diff --git a/src/main/java/liqp/filters/date/BasicDateParser.java b/src/main/java/liqp/filters/date/BasicDateParser.java index ae41f703..920f35f1 100644 --- a/src/main/java/liqp/filters/date/BasicDateParser.java +++ b/src/main/java/liqp/filters/date/BasicDateParser.java @@ -36,7 +36,7 @@ public abstract class BasicDateParser { this.put("9th", "9"); this.put("0th", "0"); }}; - protected String removeSequentialSuffixes(String input) { + public static String removeSequentialSuffixes(String input) { for (Map.Entry entry : toBeReplaced.entrySet()) { input = input.replaceAll("(?i)"+entry.getKey(), entry.getValue()); } diff --git a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java index 70cecbf9..fd49563e 100644 --- a/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java +++ b/src/main/java/liqp/filters/date/fuzzy/DatePatternRecognizingContext.java @@ -4,12 +4,12 @@ public class DatePatternRecognizingContext { - final Locale locale; - Boolean hasYear; - Boolean hasMonth; - Boolean hasDay; - Boolean weekDay; - Boolean hasTime; + public final Locale locale; + public Boolean hasYear; + public Boolean hasMonth; + public Boolean hasDate; + public Boolean weekDay; + public Boolean hasTime; public DatePatternRecognizingContext(Locale locale) { if (locale == null) { diff --git a/src/main/java/liqp/filters/date/fuzzy/LookupResult.java b/src/main/java/liqp/filters/date/fuzzy/LookupResult.java index 669cca45..59ae1b59 100644 --- a/src/main/java/liqp/filters/date/fuzzy/LookupResult.java +++ b/src/main/java/liqp/filters/date/fuzzy/LookupResult.java @@ -13,4 +13,8 @@ class LookupResult { this.parts = parts; this.found = found; } + + public String getName() { + return name; + } } diff --git a/src/main/java/liqp/filters/date/fuzzy/Part.java b/src/main/java/liqp/filters/date/fuzzy/Part.java index 9d5218ca..7b85ecc6 100644 --- a/src/main/java/liqp/filters/date/fuzzy/Part.java +++ b/src/main/java/liqp/filters/date/fuzzy/Part.java @@ -102,7 +102,7 @@ public String toString() { class RecognizedPart implements Part { final int start; final int end; - private final List patterns; + protected final List patterns; public final String source; RecognizedPart(int start, int end, List patterns, String source) { @@ -145,4 +145,19 @@ public String toString() { '}'; } } + + class RecognizedMonthNamePart extends RecognizedPart { + RecognizedMonthNamePart(int start, int end, List patterns, String source) { + super(start, end, patterns, source); + } + + @Override + public String toString() { + return "RecognizedMonthNamePart{" + + "start=" + start + + ", end=" + end + + ", pattern='" + patterns + '\'' + + '}'; + } + } } diff --git a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java index b872f508..38417564 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartExtractor.java @@ -1,6 +1,5 @@ package liqp.filters.date.fuzzy; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.function.Supplier; @@ -9,7 +8,7 @@ public interface PartExtractor { - PartExtractorResult extract(String source); + PartExtractorResult extract(String source, List parts, int i); default List newList(String... el) { return Arrays.asList(el); diff --git a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java index 143d3ddc..e6f3f217 100644 --- a/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java +++ b/src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java @@ -1,11 +1,11 @@ package liqp.filters.date.fuzzy; import static liqp.filters.date.fuzzy.extractors.Extractors.allYMDPatternExtractor; -import static liqp.filters.date.fuzzy.extractors.Extractors.fullMonthExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.fullWeekdaysExtractor; +import static liqp.filters.date.fuzzy.extractors.Extractors.monthDateExtractor; +import static liqp.filters.date.fuzzy.extractors.Extractors.monthExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.plainYearExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.regularTimeExtractor; -import static liqp.filters.date.fuzzy.extractors.Extractors.shortMonthExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.shortWeekdaysExtractor; import static liqp.filters.date.fuzzy.extractors.Extractors.yearWithEraExtractor; @@ -14,6 +14,7 @@ import java.util.List; import liqp.filters.date.fuzzy.Part.NewPart; import liqp.filters.date.fuzzy.Part.PunctuationPart; +import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; import liqp.filters.date.fuzzy.Part.RecognizedPart; import liqp.filters.date.fuzzy.Part.UnrecognizedPart; import liqp.filters.date.fuzzy.extractors.PartExtractorResult; @@ -58,7 +59,7 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) { if (result.found) { ctx.hasYear = true; ctx.hasMonth = true; - ctx.hasDay = true; + ctx.hasDate = true; return result.parts; } } @@ -72,41 +73,32 @@ List recognizePart(List parts, DatePatternRecognizingContext ctx) { // last "year check" and since we are here - there is no year ctx.hasYear = false; } - if (notSet(ctx.hasMonth)) { - LookupResult result = lookup(parts, fullMonthExtractor.get(ctx.locale)); - if (result.found) { - ctx.hasMonth = true; - return result.parts; - } - result = lookup(parts, shortMonthExtractor.get(ctx.locale)); + if (notSet(ctx.hasMonth)) { + LookupResult result = lookup(parts, monthExtractor.get(ctx.locale)); if (result.found) { ctx.hasMonth = true; return result.parts; } - ctx.hasMonth = false; } - if (notSet(ctx.hasDay)) { - LookupResult result = lookup(parts, fullMonthExtractor.get(ctx.locale)); + if (isTrue(ctx.hasMonth) && notSet(ctx.hasDate)) { + LookupResult result = lookup(parts, monthDateExtractor.get(ctx.locale)); if (result.found) { - ctx.hasDay = true; + ctx.hasDate = true; return result.parts; } - - result = lookup(parts, shortMonthExtractor.get(ctx.locale)); - if (result.found) { - ctx.hasDay = true; - return result.parts; - } - - ctx.hasDay = false; + ctx.hasDate = false; } return markAsUnrecognized(parts); } + private boolean isTrue(Boolean hasMonth) { + return hasMonth != null && hasMonth; + } + private boolean notSet(Boolean val) { return val == null; } @@ -116,7 +108,7 @@ private LookupResult lookup(List parts, PartExtractor partExtractor) { if (part.state() == Part.PartState.NEW) { String source = part.source(); - PartExtractorResult per = partExtractor.extract(source); + PartExtractorResult per = partExtractor.extract(source, parts, i); if (per.found) { parts.remove(i); @@ -125,7 +117,12 @@ private LookupResult lookup(List parts, PartExtractor partExtractor) { parts.add(i, after); } - RecognizedPart recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end)); + RecognizedPart recognized; + if (per.isMonthName) { + recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end)); + } else { + recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end)); + } parts.add(i, recognized); if (per.start != 0) { diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java index f3ff761e..436e4657 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AllYMDPatternExtractor.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.List; +import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; public class AllYMDPatternExtractor implements PartExtractor { @@ -51,9 +52,9 @@ public AllYMDPatternExtractor() { } @Override - public PartExtractorResult extract(String source) { + public PartExtractorResult extract(String source, List parts, int i) { for (AnyYMDPatternExtractor extractor : extractors) { - PartExtractorResult result = extractor.extract(source); + PartExtractorResult result = extractor.extract(source, parts, i); if (result.found) { return result; } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java index e32ae0cc..ff8a21eb 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/AnyYMDPatternExtractor.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Optional; import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.Part; class AnyYMDPatternExtractor extends RegexPartExtractor { @@ -78,7 +79,7 @@ private static String reconstructPattern(RulePart[] partsInOrder) { } @Override - public PartExtractorResult extract(String source) { + public PartExtractorResult extract(String source, List parts, int i) { Matcher matcher = pattern.matcher(source); if (matcher.find()) { PartExtractorResult result = new PartExtractorResult(name); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java index 8080ce88..942c178a 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/Extractors.java @@ -52,20 +52,35 @@ public PartExtractor get(Locale locale) { return partExtractor; } }, - fullMonthExtractor { + monthExtractor { private final Map extractors = new HashMap<>(); + @Override public PartExtractor get(Locale locale) { - return extractors.computeIfAbsent(locale, l -> new FullMonthExtractor(locale)); + return extractors.computeIfAbsent(locale, l -> new MonthExtractor(locale)); } }, - shortMonthExtractor { - private final Map extractors = new HashMap<>(); + monthDateExtractor { + private final PartExtractor partExtractor = new MonthDateExtractor(); @Override public PartExtractor get(Locale locale) { - return extractors.computeIfAbsent(locale, l -> new ShortMonthExtractor(locale)); + return partExtractor; } }, +// fullMonthExtractor { +// private final Map extractors = new HashMap<>(); +// @Override +// public PartExtractor get(Locale locale) { +// return extractors.computeIfAbsent(locale, l -> new FullMonthExtractor(locale)); +// } +// }, +// shortMonthExtractor { +// private final Map extractors = new HashMap<>(); +// @Override +// public PartExtractor get(Locale locale) { +// return extractors.computeIfAbsent(locale, l -> new ShortMonthExtractor(locale)); +// } +// }, /** * 2011-12-03 * diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/FullMonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/FullMonthExtractor.java deleted file mode 100644 index 7334639a..00000000 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/FullMonthExtractor.java +++ /dev/null @@ -1,16 +0,0 @@ -package liqp.filters.date.fuzzy.extractors; - -import java.text.DateFormatSymbols; -import java.util.Locale; - -class FullMonthExtractor extends EnumExtractor { - - public FullMonthExtractor(Locale locale) { - super("FullMonthExtractor", locale, "MMMM"); - } - - @Override - protected String[] getEnumValues(Locale locale) { - return new DateFormatSymbols(locale).getMonths(); - } -} diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java new file mode 100644 index 00000000..1c845c8f --- /dev/null +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java @@ -0,0 +1,81 @@ +package liqp.filters.date.fuzzy.extractors; + +import java.util.List; +import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.Part; +import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart; +import liqp.filters.date.fuzzy.Part.RecognizedPart; +import liqp.filters.date.fuzzy.PartExtractor; + +public class MonthDateExtractor implements PartExtractor { + + @Override + public PartExtractorResult extract(String source, List parts, int i) { + // closest right or closest left should be a month + if (rightIsMonth(parts, i)) { + return leftDateExtractor.extract(source, parts, i); + } + if (leftIsMonth(parts, i)) { + return rightDateExtractor.extract(source, parts, i); + } + return new PartExtractorResult("MonthDateExtractor"); + } + + private boolean leftIsMonth(List parts, int i) { + int left = i - 1; + while (left >= 0) { + Part part = parts.get(left); + if (part instanceof RecognizedMonthNamePart) { + return true; + } + if (part instanceof RecognizedPart) { + return false; + } + left--; + } + return false; + } + + private boolean rightIsMonth(List parts, int i) { + int right = i + 1; + while (right < parts.size()) { + Part part = parts.get(right); + if (part instanceof RecognizedMonthNamePart) { + return true; + } + if (part instanceof RecognizedPart) { + return false; + } + right++; + } + return false; + } + + + private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", "(?:^|.*?\\D)(?0?[1-9]|[12][0-9]|3[01])\\D+?$"); + private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", "^\\D+?(?0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)"); + private static class MonthDatePartExtractor extends RegexPartExtractor { + + public MonthDatePartExtractor(String name, String regex) { + super(name, regex, null); + } + + @Override + public PartExtractorResult extract(String source, List parts, int i) { + Matcher matcher = pattern.matcher(source); + if (matcher.find()) { + PartExtractorResult result = new PartExtractorResult(name); + result.found = true; + result.start = matcher.start("day"); + result.end = matcher.end("day"); + if (matcher.group("day").length() == 1) { + result.formatterPatterns = newList("d", "dd"); + } else { + result.formatterPatterns = newList("dd", "d"); + } + return result; + } + return new PartExtractorResult(name); + } + } +} diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java new file mode 100644 index 00000000..3b0783c9 --- /dev/null +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/MonthExtractor.java @@ -0,0 +1,41 @@ +package liqp.filters.date.fuzzy.extractors; + +import java.text.DateFormatSymbols; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import liqp.filters.date.fuzzy.Part; +import liqp.filters.date.fuzzy.PartExtractor; + +public class MonthExtractor implements PartExtractor { + private final List monthExtractors; + + public MonthExtractor(Locale locale) { + this.monthExtractors = new ArrayList<>(); + this.monthExtractors.add(new EnumExtractor("FullMonthExtractor", locale, "MMMM") { + @Override + protected String[] getEnumValues(Locale locale) { + return new DateFormatSymbols(locale).getMonths(); + } + }); + this.monthExtractors.add(new EnumExtractor("ShortMonthExtractor", locale, "MMM") { + @Override + protected String[] getEnumValues(Locale locale) { + return new DateFormatSymbols(locale).getShortMonths(); + } + }); + + } + @Override + public PartExtractorResult extract(String source, List parts, int i) { + PartExtractorResult res = new PartExtractorResult("MonthDayExtractor"); + for (EnumExtractor monthExtractor : monthExtractors) { + PartExtractorResult monthResult = monthExtractor.extract(source, parts, i); + if (monthResult.found) { + monthResult.isMonthName = true; + return monthResult; + } + } + return res; + } +} diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java index 515215c9..91a438f5 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorDelegate.java @@ -1,5 +1,7 @@ package liqp.filters.date.fuzzy.extractors; +import java.util.List; +import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; public class PartExtractorDelegate implements PartExtractor { @@ -7,7 +9,7 @@ public class PartExtractorDelegate implements PartExtractor { protected PartExtractor delegate; @Override - public PartExtractorResult extract(String source) { - return delegate.extract(source); + public PartExtractorResult extract(String source, List parts, int i) { + return delegate.extract(source, parts, i); } } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java index adcabf31..513651a9 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/PartExtractorResult.java @@ -6,6 +6,12 @@ public class PartExtractorResult { public final String extractorName; + public boolean found; + public int start; + public int end; + public List formatterPatterns; + public boolean isMonthName; + public PartExtractorResult(String extractorName){ this.extractorName = extractorName; @@ -16,9 +22,4 @@ public PartExtractorResult(String extractorName, String formatterPattern){ this.formatterPatterns = new ArrayList<>(); this.formatterPatterns.add(formatterPattern); } - - public boolean found; - public int start; - public int end; - public List formatterPatterns; } diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java index 7cf65751..a18a90ad 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/RegexPartExtractor.java @@ -1,7 +1,9 @@ package liqp.filters.date.fuzzy.extractors; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import liqp.filters.date.fuzzy.Part; import liqp.filters.date.fuzzy.PartExtractor; class RegexPartExtractor implements PartExtractor { @@ -17,7 +19,7 @@ public RegexPartExtractor(String name, String regex, String formatterPattern) { } @Override - public PartExtractorResult extract(String source) { + public PartExtractorResult extract(String source, List parts, int i) { Matcher matcher = pattern.matcher(source); if (matcher.find()) { PartExtractorResult result = new PartExtractorResult(name, formatterPattern); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/RegularTimeExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/RegularTimeExtractor.java index a9fd5110..53f7de9c 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/RegularTimeExtractor.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/RegularTimeExtractor.java @@ -1,6 +1,8 @@ package liqp.filters.date.fuzzy.extractors; +import java.util.List; import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.Part; class RegularTimeExtractor extends RegexPartExtractor { @@ -20,7 +22,7 @@ public RegularTimeExtractor() { } @Override - public PartExtractorResult extract(String source) { + public PartExtractorResult extract(String source, List parts, int i) { Matcher m = pattern.matcher(source); if (m.matches()) { PartExtractorResult r = new PartExtractorResult("RegularTimeExtractor"); diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/ShortMonthExtractor.java b/src/main/java/liqp/filters/date/fuzzy/extractors/ShortMonthExtractor.java deleted file mode 100644 index 04d9a7f3..00000000 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/ShortMonthExtractor.java +++ /dev/null @@ -1,16 +0,0 @@ -package liqp.filters.date.fuzzy.extractors; - -import java.text.DateFormatSymbols; -import java.util.Locale; - -class ShortMonthExtractor extends EnumExtractor { - - public ShortMonthExtractor(Locale locale) { - super("ShortMonthExtractor", locale, "MMM"); - } - - @Override - protected String[] getEnumValues(Locale locale) { - return new DateFormatSymbols(locale).getShortMonths(); - } -} diff --git a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java index 35d35e0a..34d4518e 100644 --- a/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java +++ b/src/main/java/liqp/filters/date/fuzzy/extractors/YearWithEra.java @@ -3,7 +3,9 @@ import static liqp.LValue.isBlank; import static liqp.filters.date.fuzzy.extractors.RegularTimeExtractor.repeat; +import java.util.List; import java.util.regex.Matcher; +import liqp.filters.date.fuzzy.Part; class YearWithEra extends RegexPartExtractor { @@ -13,7 +15,7 @@ public YearWithEra() { } @Override - public PartExtractorResult extract(String source) { + public PartExtractorResult extract(String source, List parts, int i) { Matcher matcher = pattern.matcher(source); if (matcher.find()) { PartExtractorResult result = new PartExtractorResult("YearWithEra"); diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java index c99ef8f4..b7a59c11 100644 --- a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java +++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserParametrizedTest.java @@ -64,19 +64,19 @@ public static Collection data() { {null, " 01:23:45.678 ", " HH:mm:ss.SSS "}, {null, " 1:23:45.678 am ", " h:mm:ss.SSS a "}, {null, " 1:23:45.678 PM ", " h:mm:ss.SSS a "}, - {null, "12 Jan 1995T01:23:45.678", "'12' MMM yyyy'T'HH:mm:ss.SSS"}, + {null, "12 Jan 1995T01:23:45.678", "dd MMM yyyy'T'HH:mm:ss.SSS"}, {null, "12 AD", "yy GG"}, {null, " 12 AD ", " yy GG "}, {null, " 12 Anno Domini ", " yy GGGG "}, {null, " 12345 Before Christ ", " yyyyy GGGG "}, {null, " 1 BC ", " y GG "}, - {null, "12 January", "'12' MMMM"}, - {null, " 12 January ", " '12' MMMM "}, - {null, "12 Jan", "'12' MMM"}, - {null, " 12 Jan ", " '12' MMM "}, + {null, "12 January", "dd MMMM"}, + {null, " 12 January ", " dd MMMM "}, + {null, "12 Jan", "dd MMM"}, + {null, " 12 Jan ", " dd MMM "}, - {null, " 12 BC 12 Jan 01:23:45.678 ", " yy GG '12' MMM HH:mm:ss.SSS "}, - {null, "12 Jan 01:23:45.678 12 Anno Domini", "'12' MMM HH:mm:ss.SSS yy GGGG"}, + {null, " 12 BC 12 Jan 01:23:45.678 ", " yy GG dd MMM HH:mm:ss.SSS "}, + {null, "12 Jan 01:23:45.678 12 Anno Domini", "dd MMM HH:mm:ss.SSS yy GGGG"}, {null, "Monday", "EEEE"}, {null, " Monday ", " EEEE "}, {null, "Monday ", "EEEE "}, @@ -95,7 +95,7 @@ public static Collection data() { : new Object[]{ Locale.GERMAN, "Mo", "EEE"} , -// {null, "Monday 17th September 1999 BC at 12:34:56.000 AM", "EEEE '17th' MMMM yyyy GG 'at' h:mm:ss.SSS a"}, + {null, "Tuesday 31st December 2024 AD at 12:34:56.000 AM", "EEEE dd MMMM yyyy GG 'at' h:mm:ss.SSS a"}, {null, "2021-1-2", "yyyy-M-d"}, {null, "2021-01-2", "yyyy-MM-d"}, {null, "2021-1-02", "yyyy-M-dd"}, @@ -109,13 +109,17 @@ public static Collection data() { {null, "1/1/23 12:34 ", "M/d/yy HH:mm "}, {null, "1/1/2023 12:34 ", "M/d/yyyy HH:mm "}, {null, "01/01/23 12:34 ", "MM/dd/yy HH:mm "}, - + {null, "11 31st of december 1996 ", "'11' dd 'of' MMMM yyyy "}, + {null, "december.31st", "MMMM.dd"}, + {null, " 11 december 11", " dd MMMM '11'"}, // incorrect + {null, " 11 december, 11 ", " dd MMMM, '11' "}, // incorrect, comma should have less priority + {null, " 11, december 11 ", " dd, MMMM '11' "}, // incorrect, comma should have less priority }); } public FuzzyDateParserParametrizedTest(Locale locale, String input, String expectedPattern) { this.locale = locale == null ? Locale.ENGLISH : locale; - this.input = input; + this.input = FuzzyDateParser.removeSequentialSuffixes(input); this.expectedPattern = expectedPattern; } diff --git a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserTest.java b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserTest.java index 8da62066..4ad05dff 100644 --- a/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserTest.java +++ b/src/test/java/liqp/filters/date/fuzzy/FuzzyDateParserTest.java @@ -14,7 +14,7 @@ public class FuzzyDateParserTest { @Test public void testTimeRegexp() { PartExtractor partExtractor = Extractors.regularTimeExtractor.get(null); - PartExtractorResult result = partExtractor.extract(" 12:34 "); + PartExtractorResult result = partExtractor.extract(" 12:34 ", null, -1); assertTrue(result.found); assertEquals( 1, result.start); assertEquals( 6, result.end); diff --git a/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java b/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java new file mode 100644 index 00000000..debe787e --- /dev/null +++ b/src/test/java/liqp/filters/date/fuzzy/StandardsTest.java @@ -0,0 +1,164 @@ +package liqp.filters.date.fuzzy; + +import static org.junit.Assert.assertEquals; + +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import org.junit.Test; + +public class StandardsTest { + + /** + * RFC 822 (superseded by RFC 1123 because of Y2K) + * Full or abbreviated weekday name + * Day must be 2 digits + * Month must be 3-letter abbreviation + * 2-digit year (YY) or 4-digit year (YYYY) + * Hour must have a leading zero, still it may be without + * Time and timezone are required + * Timezone offset in ±hhmm or abbreviated names (e.g., GMT, EST, PST). + */ + @Test + public void testRFC822() { + String[] samples = { + "Sun, 06 Nov 1994 08:49:37 GMT", + "Sun, 06 Nov 94 08:49:37 GMT", + "Sun, 6 Nov 1994 08:49:37 GMT", + "Sun, 6 Nov 94 08:49:37 GMT", + "Sun, 06 Nov 1994 8:49:37 GMT", + "Sun, 06 Nov 94 8:49:37 GMT", + "Sun, 6 Nov 1994 8:49:37 GMT", + "Sun, 6 Nov 94 8:49:37 GMT", + "Sun, 06 Nov 1994 08:49 GMT", + "Sun, 06 Nov 94 08:49 GMT", + "Sun, 6 Nov 1994 08:49 GMT", + "Sun, 6 Nov 94 08:49 GMT", + "Sun, 06 Nov 1994 8:49 GMT", + "Sun, 06 Nov 94 8:49 GMT", + "Sun, 6 Nov 1994 8:49 GMT", + "Sun, 6 Nov 94 8:49 GMT", + "Sun, 06 Nov 1994", + "Sun, 06 Nov 94", + "Sun, 6 Nov 1994", + "Sun, 6 Nov 94" + }; + + FuzzyDateParser parser = new FuzzyDateParser(); + for (String sample : samples) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + } + + /** + * RFC 1123 (an update to RFC 822 for Internet date/time) + * Same as RFC822 but with 4-digit year + * Enforces a strict 2-digit day + * Timezone must always be GMT (or UTC) + */ + @Test + public void testRFC1123() { + String[] samples = { + "Sun, 06 Nov 1994 08:49:37 GMT", + "Sun, 6 Nov 1994 08:49:37 GMT", + "Sun, 06 Nov 1994 8:49:37 GMT", + "Sun, 6 Nov 1994 8:49:37 GMT", + "Sun, 06 Nov 1994 08:49 GMT", + "Sun, 6 Nov 1994 08:49 GMT", + "Sun, 06 Nov 1994 8:49 GMT", + "Sun, 6 Nov 1994 8:49 GMT", + "Sun, 06 Nov 1994", + "Sun, 6 Nov 1994", + }; + + FuzzyDateParser parser = new FuzzyDateParser(); + for (String sample : samples) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + } + + /** + * RFC 2822 (email standard update) + * Same as RFC822 but with 4-digit year + * Day must be 2 digits + * Hours and minutes must be 2 digits + * Seconds are optional + * Timezone required and must be ±hhmm + */ + @Test + public void testRFC2822() { + String[] samples = { + "Sun, 06 Nov 1994 08:49:37 +0000", + "Sun, 6 Nov 1994 08:49:37 +0000", + "Sun, 06 Nov 1994 8:49:37 +0000", + "Sun, 6 Nov 1994 8:49:37 +0000", + "Sun, 06 Nov 1994 08:49 +0000", + "Sun, 6 Nov 1994 08:49 +0000", + "Sun, 06 Nov 1994 8:49 +0000", + "Sun, 6 Nov 1994 8:49 +0000", + "Sun, 06 Nov 1994 +0000", + "Sun, 6 Nov 1994 +0000", + }; + + FuzzyDateParser parser = new FuzzyDateParser(); + for (String sample : samples) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + } + + /** + * RFC 3339 (ISO 8601 profile for Internet) + * Example: 1997-11-21T09:55:06Z + * Z is the timezone offset and is optional + * Fractional seconds are optional + * Instead of 'T' the space is allowed as separator (ISO 8601 requires 'T') + * Separators are mandatory (ISO 8601 allows omission: 20201209T160953Z) + * No ordinal dates allowed (ISO 8601 allows 2020-344) + * No week dates allowed (ISO 8601 allows 2020-W52-3) + * negative zero timezone offset allowed '-00:00' (same in ISO 8601) + * Timezone offset must be 'Z' or ±hh:mm + */ + @Test + public void testRFC3339() { + String[] samples = { + "1994-11-06T08:49:37.123Z", + "1994-11-06T08:49:37Z", + "1994-11-06 08:49:37Z", + "1994-11-06T08:49:37+0000", + "1994-11-06T08:49:37+00", + "1994-11-06T08:49:37+00:00", + "1994-11-06T08:49:37", + "1994-11-06T08:49", + "1994-11-06T08", + "1994-11-06", + }; + + FuzzyDateParser parser = new FuzzyDateParser(); + for (String sample : samples) { + ZonedDateTime datetime = parser.parse(sample, null, ZoneOffset.UTC); + assertEquals("wrong sample:[" + sample + "]", ZonedDateTime.of(1994, 11, 6, 8, 49, 37, 0, ZoneOffset.UTC), datetime); + } + } + + @Test + public void testISO8601() { + String[] samples = { + "1994-11-06T08:49:37.123Z", + "1994-11-06T08:49:37Z", + "1994-11-06T08:49:37+00:00", + "1994-11-06T08:49:37+0000", + "1994-11-06T08:49:37+00", + "1994-11-06T08:49:37", + "1994-11-06T08:49", + "1994-11-06T08", + "1994-11-06", + }; + } + + + // more: + // https://chatgpt.com/g/g-p-6762180fb09c81919a7acd1088c93256-liqp/c/676888d1-5bfc-8003-9854-8e9033aa608a + +}