Skip to content

Commit

Permalink
Stricter DateTimeParser for known formats (fixes #569) (#4330)
Browse files Browse the repository at this point in the history
* GH #569: Cherry pick and correct code from devel-experimental.

* GH #569: Add missing timezone codes.

* enh(Foundation): DateTimeParser: stricter checks of timezones, more tests for invalid inputs. (#569)

* enh(Foundation): Small fixes of issues detected by CodeQL

---------

Co-authored-by: Alex Fabijanic <[email protected]>
  • Loading branch information
matejk and aleks-f committed Dec 11, 2023
1 parent 24b7122 commit 4f1cf68
Show file tree
Hide file tree
Showing 10 changed files with 339 additions and 61 deletions.
34 changes: 34 additions & 0 deletions Foundation/include/Poco/DateTimeFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@


#include "Poco/Foundation.h"
#include <unordered_set>


namespace Poco {


class RegularExpression;


class Foundation_API DateTimeFormat
/// Definition of date/time formats and various
/// constants used by DateTimeFormatter and DateTimeParser.
{
public:

// predefined date formats
static const std::string ISO8601_FORMAT;
/// The date/time format defined in the ISO 8601 standard.
Expand All @@ -37,6 +42,8 @@ class Foundation_API DateTimeFormat
/// 2005-01-01T12:00:00+01:00
/// 2005-01-01T11:00:00Z

static const std::string ISO8601_REGEX;

static const std::string ISO8601_FRAC_FORMAT;
/// The date/time format defined in the ISO 8601 standard,
/// with fractional seconds.
Expand All @@ -52,13 +59,17 @@ class Foundation_API DateTimeFormat
/// Sat, 1 Jan 05 12:00:00 +0100
/// Sat, 1 Jan 05 11:00:00 GMT

static const std::string RFC822_REGEX;

static const std::string RFC1123_FORMAT;
/// The date/time format defined in RFC 1123 (obsoletes RFC 822).
///
/// Examples:
/// Sat, 1 Jan 2005 12:00:00 +0100
/// Sat, 1 Jan 2005 11:00:00 GMT

static const std::string RFC1123_REGEX;

static const std::string HTTP_FORMAT;
/// The date/time format defined in the HTTP specification (RFC 2616),
/// which is basically a variant of RFC 1036 with a zero-padded day field.
Expand All @@ -67,39 +78,62 @@ class Foundation_API DateTimeFormat
/// Sat, 01 Jan 2005 12:00:00 +0100
/// Sat, 01 Jan 2005 11:00:00 GMT

static const std::string HTTP_REGEX;

static const std::string RFC850_FORMAT;
/// The date/time format defined in RFC 850 (obsoleted by RFC 1036).
///
/// Examples:
/// Saturday, 1-Jan-05 12:00:00 +0100
/// Saturday, 1-Jan-05 11:00:00 GMT

static const std::string RFC850_REGEX;

static const std::string RFC1036_FORMAT;
/// The date/time format defined in RFC 1036 (obsoletes RFC 850).
///
/// Examples:
/// Saturday, 1 Jan 05 12:00:00 +0100
/// Saturday, 1 Jan 05 11:00:00 GMT

static const std::string RFC1036_REGEX;

static const std::string ASCTIME_FORMAT;
/// The date/time format produced by the ANSI C asctime() function.
///
/// Example:
/// Sat Jan 1 12:00:00 2005

static const std::string ASCTIME_REGEX;

static const std::string SORTABLE_FORMAT;
/// A simple, sortable date/time format.
///
/// Example:
/// 2005-01-01 12:00:00

static const std::string SORTABLE_REGEX;
// ^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)$

// names used by formatter and parser
static const std::string WEEKDAY_NAMES[7];
/// English names of week days (Sunday, Monday, Tuesday, ...).

static const std::string MONTH_NAMES[12];
/// English names of months (January, February, ...).

static bool hasFormat(const std::string& fmt);
/// Returns true if fmt is a known standard format.

static bool isValid(const std::string& dateTime);
/// Returns true if dateTime validates against at least one supported format.

typedef std::unordered_set<const std::string*> RegexList;
static RegexList REGEX_LIST;

private:
typedef std::unordered_set<std::string> Formatlist;
static Formatlist FORMAT_LIST;
};


Expand Down
2 changes: 2 additions & 0 deletions Foundation/include/Poco/DateTimeParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class Foundation_API DateTimeParser
/// If more strict format validation of date/time strings is required, a regular
/// expression could be used for initial validation, before passing the string
/// to DateTimeParser.
/// TODO: Correct description

{
public:
static void parse(const std::string& fmt, const std::string& str, DateTime& dateTime, int& timeZoneDifferential);
Expand Down
34 changes: 24 additions & 10 deletions Foundation/src/DateTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "Poco/DateTime.h"
#include "Poco/Timespan.h"
#include "Poco/Exception.h"
#include "Poco/Format.h"
#include <algorithm>
#include <cmath>
#include <ctime>
Expand Down Expand Up @@ -70,16 +72,28 @@ DateTime::DateTime(int year, int month, int day, int hour, int minute, int secon
_millisecond(millisecond),
_microsecond(microsecond)
{
poco_assert (year >= 0 && year <= 9999);
poco_assert (month >= 1 && month <= 12);
poco_assert (day >= 1 && day <= daysOfMonth(year, month));
poco_assert (hour >= 0 && hour <= 23);
poco_assert (minute >= 0 && minute <= 59);
poco_assert (second >= 0 && second <= 60); // allow leap seconds
poco_assert (millisecond >= 0 && millisecond <= 999);
poco_assert (microsecond >= 0 && microsecond <= 999);

_utcTime = toUtcTime(toJulianDay(year, month, day)) + 10*(hour*Timespan::HOURS + minute*Timespan::MINUTES + second*Timespan::SECONDS + millisecond*Timespan::MILLISECONDS + microsecond);
if (isValid(_year, _month, _day, _hour, _minute, _second, _millisecond, _microsecond))
{
_utcTime = toUtcTime(toJulianDay(year, month, day)) +
10 * (hour*Timespan::HOURS + minute*Timespan::MINUTES + second*Timespan::SECONDS +
millisecond*Timespan::MILLISECONDS + microsecond);
}
else
{
throw Poco::InvalidArgumentException(Poco::format("Date time is %d-%d-%dT%d:%d:%d.%d.%d\n"
"Valid values:\n"
"0 <= year <= 9999\n"
"1 <= month <= 12\n"
"1 <= day <= %d\n"
"0 <= hour <= 23\n"
"0 <= minute <= 59\n"
"0 <= second <= 59\n"
"0 <= millisecond <= 999\n"
"0 <= microsecond <= 999",
_year, _month, _day, _hour, _minute,
_second, _millisecond, _microsecond,
daysOfMonth(_year, _month)));
}
}


Expand Down
100 changes: 100 additions & 0 deletions Foundation/src/DateTimeFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,92 @@
//


#include <Poco/Exception.h>
#include "Poco/DateTimeFormat.h"
#include "Poco/RegularExpression.h"


namespace Poco {

// NOTE: Must be in sync with DateTimeParser::parseTZD
// TODO: Validate timezone strings separately and simplify regex?
#define TIMEZONES_REGEX_PART \
"(UT)|(GMT)|(BST)|(IST)|(WET)|(WEST)|(CET)|(CEST)|(EET)|(EEST)|(EST)|(MSK)|" \
"(MSD)|(NST)|(NDT)|(AST)|(ADT)|(EST)|(EDT)|(CST)|(CDT)|(MST)|(MDT)|(PST)|" \
"(PDT)|(AKST)|(AKDT)|(HST)|(AEST)|(AEDT)|(ACST)|(ACDT)|(AWST)|(AWDT)"

const std::string DateTimeFormat::ISO8601_FORMAT("%Y-%m-%dT%H:%M:%S%z");
const std::string DateTimeFormat::ISO8601_FRAC_FORMAT("%Y-%m-%dT%H:%M:%s%z");
const std::string DateTimeFormat::ISO8601_REGEX("([\\+-]?\\d{4}(?!\\d{2}\\b))"
"((-?)"
"((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|"
"(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))"
"([T\\s]"
"((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?"
"(\\17[0-5]\\d([\\.,]\\d+)?)?([A-I]|[K-Z]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?");

const std::string DateTimeFormat::RFC822_FORMAT("%w, %e %b %y %H:%M:%S %Z");

const std::string DateTimeFormat::RFC822_REGEX("(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), *)?"
"\\d\\d? +"
"((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)) +"
"\\d\\d(\\d\\d)? +"
"\\d\\d:\\d\\d(:\\d\\d)? +"
"(([+\\-]?\\d\\d\\d\\d)|" TIMEZONES_REGEX_PART "|\\w)");

const std::string DateTimeFormat::RFC1123_FORMAT("%w, %e %b %Y %H:%M:%S %Z");
const std::string DateTimeFormat::RFC1123_REGEX(DateTimeFormat::RFC822_REGEX);

const std::string DateTimeFormat::HTTP_FORMAT("%w, %d %b %Y %H:%M:%S %Z");
const std::string DateTimeFormat::HTTP_REGEX("(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), *)?"
"\\d\\d? +"
"((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)) +"
"\\d\\d(\\d\\d)? +\\d\\d:\\d\\d(:\\d\\d)? "
"(" TIMEZONES_REGEX_PART "|)?+"
"(([+\\-]?\\d\\d\\d\\d)?|" TIMEZONES_REGEX_PART "|\\w)");

const std::string DateTimeFormat::RFC850_FORMAT("%W, %e-%b-%y %H:%M:%S %Z");
const std::string DateTimeFormat::RFC850_REGEX(
"(((Monday)|(Tuesday)|(Wednesday)|(Thursday)|(Friday)|(Saturday)|(Sunday)|"
"(Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), *)?"
"\\d\\d?-((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec))-"
"\\d\\d(\\d\\d)? +\\d\\d:\\d\\d(:\\d\\d)? "
"(" TIMEZONES_REGEX_PART "|)?+"
"(([+\\-]?\\d\\d\\d\\d)?|" TIMEZONES_REGEX_PART "|\\w)");

const std::string DateTimeFormat::RFC1036_FORMAT("%W, %e %b %y %H:%M:%S %Z");
const std::string DateTimeFormat::RFC1036_REGEX(
"(((Monday)|(Tuesday)|(Wednesday)|(Thursday)|(Friday)|(Saturday)|(Sun)), *)?"
"\\d\\d? +"
"((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)) +"
"\\d\\d(\\d\\d)? +\\d\\d:\\d\\d(:\\d\\d)? "
"(" TIMEZONES_REGEX_PART "|)?+"
"(([+\\-]?\\d\\d\\d\\d)?|" TIMEZONES_REGEX_PART "|\\w)");

// It would perhaps be useful to add RFC 2822 (successor of 822)
// https://www.rfc-editor.org/rfc/rfc2822#section-3.3

const std::string DateTimeFormat::ASCTIME_FORMAT("%w %b %f %H:%M:%S %Y");
const std::string DateTimeFormat::ASCTIME_REGEX("((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)) +"
"((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec)) +"
"\\d\\d? +\\d\\d:\\d\\d:\\d\\d +(\\d\\d\\d\\d)");

const std::string DateTimeFormat::SORTABLE_FORMAT("%Y-%m-%d %H:%M:%S");
const std::string DateTimeFormat::SORTABLE_REGEX("(\\d\\d\\d\\d-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d)");


DateTimeFormat::Formatlist DateTimeFormat::FORMAT_LIST(
{
DateTimeFormat::ISO8601_FORMAT,
DateTimeFormat::ISO8601_FRAC_FORMAT,
DateTimeFormat::RFC822_FORMAT,
DateTimeFormat::RFC1123_FORMAT,
DateTimeFormat::HTTP_FORMAT,
DateTimeFormat::RFC850_FORMAT,
DateTimeFormat::RFC1036_FORMAT,
DateTimeFormat::ASCTIME_FORMAT,
DateTimeFormat::SORTABLE_FORMAT
});


const std::string DateTimeFormat::WEEKDAY_NAMES[] =
Expand Down Expand Up @@ -58,4 +129,33 @@ const std::string DateTimeFormat::MONTH_NAMES[] =
};


DateTimeFormat::RegexList DateTimeFormat::REGEX_LIST =
{
&DateTimeFormat::ISO8601_REGEX,
&DateTimeFormat::RFC822_REGEX,
&DateTimeFormat::RFC1123_REGEX,
&DateTimeFormat::HTTP_REGEX,
&DateTimeFormat::RFC850_REGEX,
&DateTimeFormat::RFC1036_REGEX,
&DateTimeFormat::ASCTIME_REGEX,
&DateTimeFormat::SORTABLE_REGEX
};


bool DateTimeFormat::hasFormat(const std::string& fmt)
{
return FORMAT_LIST.find(fmt) != FORMAT_LIST.end();
}


bool DateTimeFormat::isValid(const std::string& dateTime)
{
for (const auto& f : REGEX_LIST)
{
if (RegularExpression(*f).match(dateTime)) return true;
}
return false;
}


} // namespace Poco
Loading

0 comments on commit 4f1cf68

Please sign in to comment.