From b142d1951e2300ab91ab10f8edb5c41a839c49c1 Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Tue, 14 Jan 2025 17:37:02 +0100 Subject: [PATCH] #178 - week-number based date extraction patterns for titles - more unit tests #191 - added two obvious date formats yyyy-mm-dd and yyyy-dd-mm --- src/custom-sort/matchers.ts | 8 ++- src/custom-sort/sorting-spec-processor.ts | 24 ++++++- src/test/int/dates-in-names.int.test.ts | 84 ++++++++++++++++++++--- src/test/unit/matchers.spec.ts | 29 +++++++- 4 files changed, 132 insertions(+), 13 deletions(-) diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts index b28543a46..7d604d798 100644 --- a/src/custom-sort/matchers.ts +++ b/src/custom-sort/matchers.ts @@ -10,11 +10,15 @@ export const NumberRegexStr: string = ' *(\\d+)'; // Plain number export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator +export const Date_yyyy_mm_dd_RegexStr: string = ' *(\\d{4}-\\d{2}-\\d{2})' +export const Date_yyyy_dd_mm_RegexStr: string = Date_yyyy_mm_dd_RegexStr + export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020 export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020 export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))' -export const Date_yyyy_Www_RegexStr: string = ' *(\\d{4}-W\\d{1,2})' +export const Date_yyyy_WwwISO_RegexStr: string = ' *(\\d{4}-W\\d{1,2})' +export const Date_yyyy_Www_RegexStr: string = Date_yyyy_WwwISO_RegexStr export const DOT_SEPARATOR = '.' export const DASH_SEPARATOR = '-' @@ -128,6 +132,8 @@ export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: nu } } +export const getNormalizedDate_yyyy_mm_dd_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 2, 1, 0) +export const getNormalizedDate_yyyy_dd_mm_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 2, 0) export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS) export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS) diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 2d9760732..04c976ebd 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -24,6 +24,8 @@ import { DOT_SEPARATOR, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn, getNormalizedDate_Mmm_dd_yyyy_NormalizerFn, + getNormalizedDate_yyyy_mm_dd_NormalizerFn, + getNormalizedDate_yyyy_dd_mm_NormalizerFn, getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn, getNormalizedDate_yyyy_WwwISO_NormalizerFn, getNormalizedDate_yyyy_Www_NormalizerFn, @@ -32,7 +34,7 @@ import { NumberRegexStr, RomanNumberRegexStr, WordInAnyLanguageRegexStr, - WordInASCIIRegexStr + WordInASCIIRegexStr, Date_yyyy_WwwISO_RegexStr, Date_yyyy_mm_dd_RegexStr, Date_yyyy_dd_mm_RegexStr } from "./matchers"; import { FolderWildcardMatching, @@ -357,6 +359,8 @@ const InlineRegexSymbol_Digit1: string = '\\d' const InlineRegexSymbol_Digit2: string = '\\[0-9]' const InlineRegexSymbol_0_to_3: string = '\\[0-3]' +const Date_yyyy_mm_dd_RegexSymbol: string = '\\[yyyy-mm-dd]' +const Date_yyyy_dd_mm_RegexSymbol: string = '\\[yyyy-dd-mm]' const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]' const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]' const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]' @@ -381,6 +385,8 @@ const sortingSymbolsArr: Array = [ escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol), escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol), escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol), + escapeRegexUnsafeCharacters(Date_yyyy_mm_dd_RegexSymbol), + escapeRegexUnsafeCharacters(Date_yyyy_dd_mm_RegexSymbol), escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol), escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol), escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol), @@ -453,6 +459,8 @@ export const CompoundDashRomanNumberNormalizerFn: NormalizerFn = (s: string) => export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s) export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR) export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR) +export const Date_yyyy_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_mm_dd_NormalizerFn(s) +export const Date_yyyy_dd_mm_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_dd_mm_NormalizerFn(s) export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s) export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s) export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s) @@ -469,6 +477,8 @@ export enum AdvancedRegexType { CompoundDashRomanNumber, WordInASCII, WordInAnyLanguage, + Date_yyyy_mm_dd, + Date_yyyy_dd_mm, Date_dd_Mmm_yyyy, Date_Mmm_dd_yyyy, Date_yyyy_Www_mm_dd_yyyy, @@ -518,6 +528,16 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { advancedRegexType: AdvancedRegexType.WordInAnyLanguage, unicodeRegex: true }, + [Date_yyyy_mm_dd_RegexSymbol]: { // Intentionally retain character case + regexpStr: Date_yyyy_mm_dd_RegexStr, + normalizerFn: Date_yyyy_mm_dd_NormalizerFn, + advancedRegexType: AdvancedRegexType.Date_yyyy_mm_dd + }, + [Date_yyyy_dd_mm_RegexSymbol]: { // Intentionally retain character case + regexpStr: Date_yyyy_dd_mm_RegexStr, + normalizerFn: Date_yyyy_dd_mm_NormalizerFn, + advancedRegexType: AdvancedRegexType.Date_yyyy_dd_mm + }, [Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case regexpStr: Date_dd_Mmm_yyyy_RegexStr, normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn, @@ -534,7 +554,7 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = { advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy }, [Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case - regexpStr: Date_yyyy_Www_RegexStr, + regexpStr: Date_yyyy_WwwISO_RegexStr, normalizerFn: Date_yyyy_WwwISO_NormalizerFn, advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO }, diff --git a/src/test/int/dates-in-names.int.test.ts b/src/test/int/dates-in-names.int.test.ts index 8d4c74081..341dd852d 100644 --- a/src/test/int/dates-in-names.int.test.ts +++ b/src/test/int/dates-in-names.int.test.ts @@ -7,7 +7,11 @@ import { DEFAULT_FOLDER_CTIME, determineFolderDatesIfNeeded, determineSortingGroup, - FolderItemForSorting, OS_alphabetical, OS_byCreatedTime, ProcessingContext, sortFolderItems + FolderItemForSorting, + OS_alphabetical, + OS_byCreatedTime, + ProcessingContext, + sortFolderItems } from "../../custom-sort/custom-sort"; import { CustomSortGroupType, @@ -29,11 +33,12 @@ import { } from "../../custom-sort/sorting-spec-processor"; describe('sortFolderItems', () => { - it('should correctly handle Mmm-dd-yyyy pattern in file names', () => { + it('should correctly handle Mmm-dd-yyyy pattern in file and folder names', () => { // given const processor: SortingSpecProcessor = new SortingSpecProcessor() const sortSpecTxt = -` ... \\[Mmm-dd-yyyy] +` + ... \\[Mmm-dd-yyyy] > a-z ` const PARENT_PATH = 'parent/folder/path' @@ -60,11 +65,12 @@ describe('sortFolderItems', () => { 'AAA Jan-01-2012' ]) }) - it('should correctly handle yyyy-Www (mm-dd) pattern in file names', () => { + it('should correctly handle yyyy-Www (mm-dd) pattern in file and folder names', () => { // given const processor: SortingSpecProcessor = new SortingSpecProcessor() const sortSpecTxt = -` ... \\[yyyy-Www (mm-dd)] +` + ... \\[yyyy-Www (mm-dd)] < a-z ------ ` @@ -94,11 +100,12 @@ describe('sortFolderItems', () => { "------.md" ]) }) - it('should correctly handle yyyy-WwwISO pattern in file names', () => { + it('should correctly handle yyyy-WwwISO pattern in file and folder names', () => { // given const processor: SortingSpecProcessor = new SortingSpecProcessor() const sortSpecTxt = -` /+ ... \\[yyyy-Www (mm-dd)] +` + /+ ... \\[yyyy-Www (mm-dd)] /+ ... \\[yyyy-WwwISO] < a-z ` @@ -132,12 +139,58 @@ describe('sortFolderItems', () => { "------.md" ]) }) - it('should correctly handle yyyy-Www pattern in file names', () => { + it('should correctly handle yyyy-Www pattern in file and folder names', () => { + // given + const processor: SortingSpecProcessor = new SortingSpecProcessor() + const sortSpecTxt = +` + /+ ... \\[yyyy-Www (mm-dd)] + /+ ... \\[yyyy-Www] + > a-z + ... \\-d+ +` + const PARENT_PATH = 'parent/folder/path' + const sortSpecsCollection = processor.parseSortSpecFromText( + sortSpecTxt.split('\n'), + PARENT_PATH, + 'file name with the sorting, irrelevant here' + ) + + const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH) + const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]! + + const ctx: ProcessingContext = {} + + // when + const result: Array = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical) + + // then + // U.S. standard of weeks numbering + const orderedNames = result.map(f => f.name) + expect(orderedNames).toEqual([ + 'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md', + 'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md', + 'C 2021-W51 (12-17).md', + 'D ISO:2021-12-20 US:2021-12-13 2021-W51.md', + 'A 2021-W10 (03-05).md', + 'B ISO:2021-03-08 US:2021-03-01 2021-W10', + 'E 2021-W1 (01-01)', + 'F ISO:2021-01-04 US:2020-12-28 2021-W1', + "------.md" + ]) + }) + it('should correctly mix for sorting different date formats in file and folder names', () => { // given const processor: SortingSpecProcessor = new SortingSpecProcessor() const sortSpecTxt = -` /+ ... \\[yyyy-Www (mm-dd)] +` + /+ ... \\[yyyy-Www (mm-dd)] /+ ... \\[yyyy-Www] + /+ ... mm-dd \\[yyyy-mm-dd] + /+ ... dd-mm \\[yyyy-dd-mm] + /+ ... \\[yyyy-mm-dd] + /+ ... \\[Mmm-dd-yyyy] + /+ \\[dd-Mmm-yyyy] ... > a-z ` const PARENT_PATH = 'parent/folder/path' @@ -148,6 +201,14 @@ describe('sortFolderItems', () => { ) const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH) + folder.children.push(...[ + mockTFile('File 2021-12-14', 'md'), + mockTFile('File mm-dd 2020-12-30', 'md'), // mm-dd + mockTFile('File dd-mm 2020-31-12', 'md'), // dd-mm + mockTFile('File Mar-08-2021', 'md'), + mockTFile('18-Dec-2021 file', 'md'), + ]) + const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]! const ctx: ProcessingContext = {} @@ -161,11 +222,16 @@ describe('sortFolderItems', () => { expect(orderedNames).toEqual([ 'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md', 'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md', + "18-Dec-2021 file.md", 'C 2021-W51 (12-17).md', + "File 2021-12-14.md", 'D ISO:2021-12-20 US:2021-12-13 2021-W51.md', + "File Mar-08-2021.md", 'A 2021-W10 (03-05).md', 'B ISO:2021-03-08 US:2021-03-01 2021-W10', 'E 2021-W1 (01-01)', + "File dd-mm 2020-31-12.md", + "File mm-dd 2020-12-30.md", 'F ISO:2021-01-04 US:2020-12-28 2021-W1', "------.md" ]) diff --git a/src/test/unit/matchers.spec.ts b/src/test/unit/matchers.spec.ts index 7515e37c5..0a4b9862f 100644 --- a/src/test/unit/matchers.spec.ts +++ b/src/test/unit/matchers.spec.ts @@ -10,7 +10,12 @@ import { CompoundRomanNumberDotRegexStr, CompoundRomanNumberDashRegexStr, WordInASCIIRegexStr, - WordInAnyLanguageRegexStr, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn + WordInAnyLanguageRegexStr, + getNormalizedDate_dd_Mmm_yyyy_NormalizerFn, + getNormalizedDate_yyyy_Www_NormalizerFn, + getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn, + getNormalizedDate_yyyy_dd_mm_NormalizerFn, + getNormalizedDate_yyyy_mm_dd_NormalizerFn } from "../../custom-sort/matchers"; describe('Plain numbers regexp', () => { @@ -431,3 +436,25 @@ describe('getNormalizedDate_dd_Mmm_yyyy_NormalizerFn', () => { expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out) }) }) + +describe('getNormalizedDate_yyyy_dd_mm_NormalizerFn', () => { + const params = [ + ['2012-13-01', '2012-01-13//', '2012-13-01//'], + ['0001-03-02', '0001-02-03//', '0001-03-02//'], + ['7777-09-1234', '7777-1234-09//', '7777-09-1234//'], + ]; + it.each(params)('>%s< should become %s', (s: string, outForDDMM: string, outForMMDD: string) => { + expect(getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)).toBe(outForDDMM) + expect(getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)).toBe(outForMMDD) + }) +}) + +describe('getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn', () => { + const params = [ + ['2012-W0 (01-13)', '2012-01-13//'], + ['0002-W12 (02-03)', '0002-02-03//'], + ]; + it.each(params)('>%s< should become %s', (s: string, out: string) => { + expect(getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)).toBe(out) + }) +})