Skip to content

Commit

Permalink
#178 - week-number based date extraction patterns for titles
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianMC committed Jan 14, 2025
1 parent f9c9c0b commit 975f6ee
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 18 deletions.
56 changes: 56 additions & 0 deletions src/custom-sort/matchers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import {
getDateForWeekOfYear
} from "../utils/week-of-year";

export const RomanNumberRegexStr: string = ' *([MDCLXVI]+)'; // Roman number
export const CompoundRomanNumberDotRegexStr: string = ' *([MDCLXVI]+(?:\\.[MDCLXVI]+)*)';// Compound Roman number with dot as separator
export const CompoundRomanNumberDashRegexStr: string = ' *([MDCLXVI]+(?:-[MDCLXVI]+)*)'; // Compound Roman number with dash as separator
Expand All @@ -9,6 +13,9 @@ export const CompoundNumberDashRegexStr: string = ' *(\\d+(?:-\\d+)*)'; // Compo
export const Date_dd_Mmm_yyyy_RegexStr: string = ' *([0-3]*[0-9]-(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-\\d{4})'; // Date like 01-Jan-2020
export const Date_Mmm_dd_yyyy_RegexStr: string = ' *((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)-[0-3]*[0-9]-\\d{4})'; // Date like Jan-01-2020

export const Date_yyyy_Www_mm_dd_RegexStr: string = ' *(\\d{4}-W\\d{1,2} \\(\\d{2}-\\d{2}\\))'
export const Date_yyyy_Www_RegexStr: string = ' *(\\d{4}-W\\d{1,2})'

export const DOT_SEPARATOR = '.'
export const DASH_SEPARATOR = '-'

Expand Down Expand Up @@ -123,3 +130,52 @@ export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: nu

export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)
export const getNormalizedDate_Mmm_dd_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 1, 0, 2, MONTHS)

const DateExtractor_yyyy_Www_mm_dd_Regex = /(\d{4})-W(\d{1,2}) \((\d{2})-(\d{2})\)/
const DateExtractor_yyyy_Www_Regex = /(\d{4})-W(\d{1,2})/

// Matching groups
const YEAR_IDX = 1
const WEEK_IDX = 2
const MONTH_IDX = 3
const DAY_IDX = 4

const DECEMBER = 12
const JANUARY = 1

export function getNormalizedDate_NormalizerFn_yyyy_Www_mm_dd(consumeWeek: boolean, weeksISO?: boolean) {
return (s: string): string | null => {
// Assumption - the regex date matched against input s, no extensive defensive coding needed
const matches = consumeWeek ? DateExtractor_yyyy_Www_Regex.exec(s) : DateExtractor_yyyy_Www_mm_dd_Regex.exec(s)
const yearStr = matches![YEAR_IDX]
let yearNumber = Number.parseInt(yearStr,10)
let monthNumber: number
let dayNumber: number
if (consumeWeek) {
const weekNumberStr = matches![WEEK_IDX]
const weekNumber = Number.parseInt(weekNumberStr, 10)
const dateForWeek = getDateForWeekOfYear(yearNumber, weekNumber, weeksISO)
monthNumber = dateForWeek.getMonth()+1 // 1 - 12
dayNumber = dateForWeek.getDate() // 1 - 31
// Be careful with edge dates, which can belong to previous or next year
if (weekNumber === 1) {
if (monthNumber === DECEMBER) {
yearNumber--
}
}
if (weekNumber >= 50) {
if (monthNumber === JANUARY) {
yearNumber++
}
}
} else { // ignore week
monthNumber = Number.parseInt(matches![MONTH_IDX],10)
dayNumber = Number.parseInt(matches![DAY_IDX], 10)
}
return `${prependWithZeros(`${yearNumber}`, YEAR_POSITIONS)}-${prependWithZeros(`${monthNumber}`, MONTH_POSITIONS)}-${prependWithZeros(`${dayNumber}`, DAY_POSITIONS)}//`
}
}

export const getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn = getNormalizedDate_NormalizerFn_yyyy_Www_mm_dd(false)
export const getNormalizedDate_yyyy_WwwISO_NormalizerFn = getNormalizedDate_NormalizerFn_yyyy_Www_mm_dd(true, true)
export const getNormalizedDate_yyyy_Www_NormalizerFn = getNormalizedDate_NormalizerFn_yyyy_Www_mm_dd(true, false)
36 changes: 34 additions & 2 deletions src/custom-sort/sorting-spec-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ import {
DASH_SEPARATOR,
Date_dd_Mmm_yyyy_RegexStr,
Date_Mmm_dd_yyyy_RegexStr,
Date_yyyy_Www_mm_dd_RegexStr,
Date_yyyy_Www_RegexStr,
DOT_SEPARATOR,
getNormalizedDate_dd_Mmm_yyyy_NormalizerFn,
getNormalizedDate_Mmm_dd_yyyy_NormalizerFn,
getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn,
getNormalizedDate_yyyy_WwwISO_NormalizerFn,
getNormalizedDate_yyyy_Www_NormalizerFn,
getNormalizedNumber,
getNormalizedRomanNumber,
NumberRegexStr,
Expand Down Expand Up @@ -354,6 +359,9 @@ const InlineRegexSymbol_0_to_3: string = '\\[0-3]'

const Date_dd_Mmm_yyyy_RegexSymbol: string = '\\[dd-Mmm-yyyy]'
const Date_Mmm_dd_yyyy_RegexSymbol: string = '\\[Mmm-dd-yyyy]'
const Date_yyyy_Www_mm_dd_RegexSymbol: string = '\\[yyyy-Www (mm-dd)]'
const Date_yyyy_Www_RegexSymbol: string = '\\[yyyy-Www]'
const Date_yyyy_WwwISO_RegexSymbol: string = '\\[yyyy-WwwISO]'

const InlineRegexSymbol_CapitalLetter: string = '\\C'
const InlineRegexSymbol_LowercaseLetter: string = '\\l'
Expand All @@ -374,7 +382,10 @@ const sortingSymbolsArr: Array<string> = [
escapeRegexUnsafeCharacters(WordInASCIIRegexSymbol),
escapeRegexUnsafeCharacters(WordInAnyLanguageRegexSymbol),
escapeRegexUnsafeCharacters(Date_dd_Mmm_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol)
escapeRegexUnsafeCharacters(Date_Mmm_dd_yyyy_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_Www_mm_dd_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_WwwISO_RegexSymbol),
escapeRegexUnsafeCharacters(Date_yyyy_Www_RegexSymbol),
]

const sortingSymbolsRegex = new RegExp(sortingSymbolsArr.join('|'), 'gi')
Expand Down Expand Up @@ -444,6 +455,9 @@ export const CompoundDotNumberNormalizerFn: NormalizerFn = (s: string) => getNor
export const CompoundDashNumberNormalizerFn: NormalizerFn = (s: string) => getNormalizedNumber(s, DASH_SEPARATOR)
export const Date_dd_Mmm_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s)
export const Date_Mmm_dd_yyyy_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_Mmm_dd_yyyy_NormalizerFn(s)
export const Date_yyyy_Www_mm_dd_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_mm_dd_NormalizerFn(s)
export const Date_yyyy_WwwISO_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_WwwISO_NormalizerFn(s)
export const Date_yyyy_Www_NormalizerFn: NormalizerFn = (s: string) => getNormalizedDate_yyyy_Www_NormalizerFn(s)

export enum AdvancedRegexType {
None, // to allow if (advancedRegex)
Expand All @@ -456,7 +470,10 @@ export enum AdvancedRegexType {
WordInASCII,
WordInAnyLanguage,
Date_dd_Mmm_yyyy,
Date_Mmm_dd_yyyy
Date_Mmm_dd_yyyy,
Date_yyyy_Www_mm_dd_yyyy,
Date_yyyy_WwwISO,
Date_yyyy_Www
}

const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
Expand Down Expand Up @@ -510,6 +527,21 @@ const sortingSymbolToRegexpStr: { [key: string]: RegExpSpecStr } = {
regexpStr: Date_Mmm_dd_yyyy_RegexStr,
normalizerFn: Date_Mmm_dd_yyyy_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_Mmm_dd_yyyy
},
[Date_yyyy_Www_mm_dd_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_mm_dd_RegexStr,
normalizerFn: Date_yyyy_Www_mm_dd_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_Www_mm_dd_yyyy
},
[Date_yyyy_WwwISO_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_RegexStr,
normalizerFn: Date_yyyy_WwwISO_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_WwwISO
},
[Date_yyyy_Www_RegexSymbol]: { // Intentionally retain character case
regexpStr: Date_yyyy_Www_RegexStr,
normalizerFn: Date_yyyy_Www_NormalizerFn,
advancedRegexType: AdvancedRegexType.Date_yyyy_Www
}
}

Expand Down
114 changes: 113 additions & 1 deletion src/test/int/dates-in-names.int.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {
TAbstractFile,
TAbstractFile, TFile,
TFolder,
Vault
} from "obsidian";
Expand All @@ -21,6 +21,8 @@ import {
mockTFolderWithDateNamedChildren,
TIMESTAMP_DEEP_NEWEST,
TIMESTAMP_DEEP_OLDEST,
mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest,
mockTFolderWithDateWeekNamedChildren, mockTFile, mockTFolder,
} from "../mocks";
import {
SortingSpecProcessor
Expand Down Expand Up @@ -58,6 +60,116 @@ describe('sortFolderItems', () => {
'AAA Jan-01-2012'
])
})
it('should correctly handle yyyy-Www (mm-dd) pattern in file names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` ... \\[yyyy-Www (mm-dd)]
< a-z
------
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)

const folder: TFolder = mockTFolderWithDateWeekNamedChildren(PARENT_PATH)
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!

const ctx: ProcessingContext = {}

// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)

// then
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
"GHI 2021-W1 (01-04)",
"DEF 2021-W9 (03-01).md",
"ABC 2021-W13 (03-29)",
"MNO 2021-W45 (11-08).md",
"JKL 2021-W52 (12-27).md",
"------.md"
])
})
it('should correctly handle yyyy-WwwISO pattern in file names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-WwwISO]
< a-z
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)

const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!

const ctx: ProcessingContext = {}

// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)

// then
// ISO standard of weeks numbering
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
'E 2021-W1 (01-01)',
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'C 2021-W51 (12-17).md',
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
"------.md"
])
})
it('should correctly handle yyyy-Www pattern in file names', () => {
// given
const processor: SortingSpecProcessor = new SortingSpecProcessor()
const sortSpecTxt =
` /+ ... \\[yyyy-Www (mm-dd)]
/+ ... \\[yyyy-Www]
> a-z
`
const PARENT_PATH = 'parent/folder/path'
const sortSpecsCollection = processor.parseSortSpecFromText(
sortSpecTxt.split('\n'),
PARENT_PATH,
'file name with the sorting, irrelevant here'
)

const folder: TFolder = mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest(PARENT_PATH)
const sortSpec: CustomSortSpec = sortSpecsCollection?.sortSpecByPath![PARENT_PATH]!

const ctx: ProcessingContext = {}

// when
const result: Array<TAbstractFile> = sortFolderItems(folder, folder.children, sortSpec, ctx, OS_alphabetical)

// then
// U.S. standard of weeks numbering
const orderedNames = result.map(f => f.name)
expect(orderedNames).toEqual([
'FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53.md',
'FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52.md',
'C 2021-W51 (12-17).md',
'D ISO:2021-12-20 US:2021-12-13 2021-W51.md',
'A 2021-W10 (03-05).md',
'B ISO:2021-03-08 US:2021-03-01 2021-W10',
'E 2021-W1 (01-01)',
'F ISO:2021-01-04 US:2020-12-28 2021-W1',
"------.md"
])
})
})


Expand Down
30 changes: 30 additions & 0 deletions src/test/mocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,33 @@ export const mockTFolderWithDateNamedChildren = (name: string): TFolder => {

return mockTFolder(name, [child1, child2, child3, child4])
}

export const mockTFolderWithDateWeekNamedChildren = (name: string): TFolder => {
// Assume ISO week numbers
const child0: TFile = mockTFile('------', 'md')
const child1: TFolder = mockTFolder('ABC 2021-W13 (03-29)')
const child2: TFile = mockTFile('DEF 2021-W9 (03-01)', 'md')
const child3: TFolder = mockTFolder('GHI 2021-W1 (01-04)')
const child4: TFile = mockTFile('JKL 2021-W52 (12-27)', 'md')
const child5: TFile = mockTFile('MNO 2021-W45 (11-08)', 'md')

return mockTFolder(name, [child0, child1, child2, child3, child4, child5])
}

export const mockTFolderWithDateWeekNamedChildrenForISOvsUSweekNumberingTest = (name: string): TFolder => {
// Tricky to test handling of both ISO and U.S. weeks numbering.
// Sample year with different week numbers in ISO vs. U.S. is 2021 with 1st Jan on Fri, ISO != U.S.
// Plain files and folder names to match both week-only and week+date syntax
// Their relative ordering depends on week numbering
const child0: TFile = mockTFile('------', 'md')
const child1: TFile = mockTFile('A 2021-W10 (03-05)', 'md') // Tue date, (ISO) week number invalid, ignored
const child2: TFolder = mockTFolder('B ISO:2021-03-08 US:2021-03-01 2021-W10')
const child3: TFile = mockTFile('C 2021-W51 (12-17)', 'md') // Tue date, (ISO) week number invalid, ignored
const child4: TFile = mockTFile('D ISO:2021-12-20 US:2021-12-13 2021-W51', 'md')
const child5: TFolder = mockTFolder('E 2021-W1 (01-01)') // Tue date, to (ISO) week number invalid, ignored
const child6: TFolder = mockTFolder('F ISO:2021-01-04 US:2020-12-28 2021-W1')
const child7: TFile = mockTFile('FFF2 ISO:2021-12-27 US:2021-12-20 2021-W52', 'md')
const child8: TFile = mockTFile('FFF1 ISO:2022-01-03 US:2021-12-27 2021-W53', 'md') // Invalid week, should fall to next year

return mockTFolder(name, [child0, child1, child2, child3, child4, child5, child6, child7, child8])
}
Loading

0 comments on commit 975f6ee

Please sign in to comment.