Skip to content

Commit

Permalink
#178 - week-number based date extraction patterns for titles
Browse files Browse the repository at this point in the history
- more unit tests
#191 - added two obvious date formats yyyy-mm-dd and yyyy-dd-mm
  • Loading branch information
SebastianMC committed Jan 14, 2025
1 parent 975f6ee commit b142d19
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 13 deletions.
8 changes: 7 additions & 1 deletion src/custom-sort/matchers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ export const NumberRegexStr: string = ' *(\\d+)'; // Plain number
export const CompoundNumberDotRegexStr: string = ' *(\\d+(?:\\.\\d+)*)'; // Compound number with dot as separator
export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compound number with dash as separator

export const Date_yyyy_mm_dd_RegexStr: string = ' *(\\d{4}-\\d{2}-\\d{2})'
export const Date_yyyy_dd_mm_RegexStr: string = Date_yyyy_mm_dd_RegexStr

export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020
export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020

export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))'
export const Date_yyyy_Www_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'
export const Date_yyyy_WwwISO_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'
export const Date_yyyy_Www_RegexStr: string = Date_yyyy_WwwISO_RegexStr

export const DOT_SEPARATOR = '.'
export const DASH_SEPARATOR = '-'
Expand Down Expand Up @@ -128,6 +132,8 @@ export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: nu
}
}

export const getNormalizedDate_yyyy_mm_dd_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 2, 1, 0)
export const getNormalizedDate_yyyy_dd_mm_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 2, 0)
export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS)

Expand Down
24 changes: 22 additions & 2 deletions src/custom-sort/sorting-spec-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import {
DOT_SEPARATOR,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_Mmm_dd_yyyy_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_WwwISO_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
Expand All @@ -32,7 +34,7 @@ import {
NumberRegexStr,
RomanNumberRegexStr,
WordInAnyLanguageRegexStr,
WordInASCIIRegexStr
WordInASCIIRegexStr, Date_yyyy_WwwISO_RegexStr, Date_yyyy_mm_dd_RegexStr, Date_yyyy_dd_mm_RegexStr
} from "./matchers";
import {
FolderWildcardMatching,
Expand Down Expand Up @@ -357,6 +359,8 @@ const InlineRegexSymbol_Digit1: string = '\\d'
const InlineRegexSymbol_Digit2: string = '\\[0-9]'
const InlineRegexSymbol_0_to_3: string = '\\[0-3]'

const Date_yyyy_mm_dd_RegexSymbol: string = '\\[yyyy-mm-dd]'
const Date_yyyy_dd_mm_RegexSymbol: string = '\\[yyyy-dd-mm]'
const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]'
const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]'
const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]'
Expand All @@ -381,6 +385,8 @@ const sortingSymbolsArr: Array<string> = [
escapeRegexUnsafeCharacters(CompoundRomanNumberDashRegexSymbol),
escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol),
escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_mm_dd_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_dd_mm_RegexSymbol),
escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol),
Expand Down Expand Up @@ -453,6 +459,8 @@ export const CompoundDashRomanNumberNormalizerFn: NormalizerFn = (s: string) =>
export const NumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s)
export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DOT_SEPARATOR)
export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR)
export const Date_yyyy_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)
export const Date_yyyy_dd_mm_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)
export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)
export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s)
export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)
Expand All @@ -469,6 +477,8 @@ export enum AdvancedRegexType {
CompoundDashRomanNumber,
WordInASCII,
WordInAnyLanguage,
Date_yyyy_mm_dd,
Date_yyyy_dd_mm,
Date_dd_Mmm_yyyy,
Date_Mmm_dd_yyyy,
Date_yyyy_Www_mm_dd_yyyy,
Expand Down Expand Up @@ -518,6 +528,16 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.WordInAnyLanguage,
unicodeRegex: true
},
[Date_yyyy_mm_dd_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_mm_dd_RegexStr,
normalizerFn: Date_yyyy_mm_dd_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_mm_dd
},
[Date_yyyy_dd_mm_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_dd_mm_RegexStr,
normalizerFn: Date_yyyy_dd_mm_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_dd_mm
},
[Date_dd_Mmm_yyyy_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_dd_Mmm_yyyy_RegexStr,
normalizerFn: Date_dd_Mmm_yyyy_NormalizerFn,
Expand All @@ -534,7 +554,7 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy
},
[Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_RegexStr,
regexpStr: Date_yyyy_WwwISO_RegexStr,
normalizerFn: Date_yyyy_WwwISO_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO
},
Expand Down
84 changes: 75 additions & 9 deletions src/test/int/dates-in-names.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ import {
DEFAULT_FOLDER_CTIME,
determineFolderDatesIfNeeded,
determineSortingGroup,
FolderItemForSorting, OS_alphabetical, OS_byCreatedTime, ProcessingContext, sortFolderItems
FolderItemForSorting,
OS_alphabetical,
OS_byCreatedTime,
ProcessingContext,
sortFolderItems
} from "../../custom-sort/custom-sort";
import {
CustomSortGroupType,
Expand All @@ -29,11 +33,12 @@ import {
} from "../../custom-sort/sorting-spec-processor";

describe('sortFolderItems', () => {
it('should correctly handle Mmm-dd-yyyy pattern in file names', () => {
it('should correctly handle Mmm-dd-yyyy pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` ... \\[Mmm-dd-yyyy]
`
... \\[Mmm-dd-yyyy]
> a-z
`
const PARENT_PATH = 'parent/folder/path'
Expand All @@ -60,11 +65,12 @@ describe('sortFolderItems', () => {
'AAA Jan-01-2012'
])
})
it('should correctly handle yyyy-Www (mm-dd) pattern in file names', () => {
it('should correctly handle yyyy-Www (mm-dd) pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` ... \\[yyyy-Www (mm-dd)]
`
... \\[yyyy-Www (mm-dd)]
< a-z
------
`
Expand Down Expand Up @@ -94,11 +100,12 @@ describe('sortFolderItems', () => {
"------.md"
])
})
it('should correctly handle yyyy-WwwISO pattern in file names', () => {
it('should correctly handle yyyy-WwwISO pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-WwwISO]
< a-z
`
Expand Down Expand Up @@ -132,12 +139,58 @@ describe('sortFolderItems', () => {
"------.md"
])
})
it('should correctly handle yyyy-Www pattern in file names', () => {
it('should correctly handle yyyy-Www pattern in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
> a-z
... \\-d+
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)

const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!

const ctx: ProcessingContext = {}

// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)

// then
// U.S. standard of weeks numbering
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
'C 2021-W51 (12-17).md',
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'E 2021-W1 (01-01)',
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
"------.md"
])
})
it('should correctly mix for sorting different date formats in file and folder names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
`
/+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
/+ ... mm-dd \\[yyyy-mm-dd]
/+ ... dd-mm \\[yyyy-dd-mm]
/+ ... \\[yyyy-mm-dd]
/+ ... \\[Mmm-dd-yyyy]
/+ \\[dd-Mmm-yyyy] ...
> a-z
`
const PARENT_PATH = 'parent/folder/path'
Expand All @@ -148,6 +201,14 @@ describe('sortFolderItems', () => {
)

const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
folder.children.push(...[
mockTFile('File 2021-12-14', 'md'),
mockTFile('File mm-dd 2020-12-30', 'md'), // mm-dd
mockTFile('File dd-mm 2020-31-12', 'md'), // dd-mm
mockTFile('File Mar-08-2021', 'md'),
mockTFile('18-Dec-2021 file', 'md'),
])

const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!

const ctx: ProcessingContext = {}
Expand All @@ -161,11 +222,16 @@ describe('sortFolderItems', () => {
expect(orderedNames).toEqual([
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
"18-Dec-2021 file.md",
'C 2021-W51 (12-17).md',
"File 2021-12-14.md",
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
"File Mar-08-2021.md",
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'E 2021-W1 (01-01)',
"File dd-mm 2020-31-12.md",
"File mm-dd 2020-12-30.md",
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
"------.md"
])
Expand Down
29 changes: 28 additions & 1 deletion src/test/unit/matchers.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ import {
CompoundRomanNumberDotRegexStr,
CompoundRomanNumberDashRegexStr,
WordInASCIIRegexStr,
WordInAnyLanguageRegexStr, getNormalizedDate_dd_Mmm_yyyy_NormalizerFn
WordInAnyLanguageRegexStr,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_dd_mm_NormalizerFn,
getNormalizedDate_yyyy_mm_dd_NormalizerFn
} from "../../custom-sort/matchers";

describe('Plain numbers regexp', () => {
Expand Down Expand Up @@ -431,3 +436,25 @@ describe('getNormalizedDate_dd_Mmm_yyyy_NormalizerFn', () => {
expect(getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)).toBe(out)
})
})

describe('getNormalizedDate_yyyy_dd_mm_NormalizerFn', () => {
const params = [
['2012-13-01', '2012-01-13//', '2012-13-01//'],
['0001-03-02', '0001-02-03//', '0001-03-02//'],
['7777-09-1234', '7777-1234-09//', '7777-09-1234//'],
];
it.each(params)('>%s< should become %s', (s: string, outForDDMM: string, outForMMDD: string) => {
expect(getNormalizedDate_yyyy_dd_mm_NormalizerFn(s)).toBe(outForDDMM)
expect(getNormalizedDate_yyyy_mm_dd_NormalizerFn(s)).toBe(outForMMDD)
})
})

describe('getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn', () => {
const params = [
['2012-W0 (01-13)', '2012-01-13//'],
['0002-W12 (02-03)', '0002-02-03//'],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)).toBe(out)
})
})

0 comments on commit b142d19

Please sign in to comment.