diff --git a/bin/lyricsContentHashValidator.ts b/bin/lyricsContentHashValidator.ts index 817df7f16..102aecfed 100644 --- a/bin/lyricsContentHashValidator.ts +++ b/bin/lyricsContentHashValidator.ts @@ -2,7 +2,6 @@ import path from 'path'; import * as process from 'process'; import { filter, includes, isEmpty } from 'lodash-es'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import chalk from 'chalk'; import assert from 'node:assert'; import fs from 'fs'; @@ -12,31 +11,29 @@ import { getSongInSectionTuples, logFileWithLinkInConsole, logProcessingFile, + readTxtFilesRecursively, SongMeta, - TXT_EXTENSION, } from '../src/index.js'; dotenv.config(); const runValidationForDir = async (dir: string) => { const duplicateHashes = filter( - (await recursive(dir)) - .filter((filePath) => filePath.endsWith(TXT_EXTENSION)) - .map((filePath) => { - const fileName = path.basename(filePath); - const fileContent = fs.readFileSync(filePath).toString(); - logProcessingFile(fileName, 'content hash validation'); - logFileWithLinkInConsole(filePath); - - const maybeTitle = getSongInSectionTuples(fileContent)[1]; - - assert.ok( - maybeTitle.includes(SongMeta.CONTENT_HASH), - `The ${SongMeta.CONTENT_HASH} should be defined.`, - ); - - return getMetaSectionsFromTitle(maybeTitle)[SongMeta.CONTENT_HASH]; - }), + (await readTxtFilesRecursively(dir)).map((filePath) => { + const fileName = path.basename(filePath); + const fileContent = fs.readFileSync(filePath).toString(); + logProcessingFile(fileName, 'content hash validation'); + logFileWithLinkInConsole(filePath); + + const maybeTitle = getSongInSectionTuples(fileContent)[1]; + + assert.ok( + maybeTitle.includes(SongMeta.CONTENT_HASH), + `The ${SongMeta.CONTENT_HASH} should be defined.`, + ); + + return getMetaSectionsFromTitle(maybeTitle)[SongMeta.CONTENT_HASH]; + }), (current, index, iteratee) => includes(iteratee, current, index + 1), ); diff --git a/bin/lyricsFileNameReprocessorRunner.ts b/bin/lyricsFileNameReprocessorRunner.ts index 8b7b4f614..095dd306d 100644 --- a/bin/lyricsFileNameReprocessorRunner.ts +++ b/bin/lyricsFileNameReprocessorRunner.ts @@ -2,7 +2,6 @@ import fs from 'fs-extra'; import path from 'path'; import * as process from 'process'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import { isEqual } from 'lodash-es'; import chalk from 'chalk'; import { @@ -10,7 +9,7 @@ import { logFileWithLinkInConsole, logProcessingFile, lyricsFileNameReprocessor, - TXT_EXTENSION, + readTxtFilesRecursively, } from '../src/index.js'; dotenv.config(); @@ -18,37 +17,35 @@ dotenv.config(); const run = async (dir: string) => { console.log(`"Reprocessing file names from ${dir} directory.."`); - (await recursive(dir)) - .filter((filePath) => path.extname(filePath) === TXT_EXTENSION) - .forEach((filePath) => { - const existingContent = fs.readFileSync(filePath).toString(); - const fileName = path.basename(filePath); - logProcessingFile(fileName, 'file name'); - logFileWithLinkInConsole(filePath); - - const newFileName = lyricsFileNameReprocessor.deriveFromTitle( - getRawTitleBySong(existingContent), - ); - const hasNoChange = isEqual(fileName, newFileName); - - if (hasNoChange) { - console.log(chalk.yellow(`Skipped the ${fileName} file.`)); - console.log(); - console.groupEnd(); - - return; - } - - fs.unlinkSync(filePath); - fs.writeFileSync( - path.join(path.dirname(filePath), newFileName), - existingContent, - ); - - console.log(chalk.green(`Renamed to "${newFileName}"`)); + (await readTxtFilesRecursively(dir)).forEach((filePath) => { + const existingContent = fs.readFileSync(filePath).toString(); + const fileName = path.basename(filePath); + logProcessingFile(fileName, 'file name'); + logFileWithLinkInConsole(filePath); + + const newFileName = lyricsFileNameReprocessor.deriveFromTitle( + getRawTitleBySong(existingContent), + ); + const hasNoChange = isEqual(fileName, newFileName); + + if (hasNoChange) { + console.log(chalk.yellow(`Skipped the ${fileName} file.`)); console.log(); console.groupEnd(); - }); + + return; + } + + fs.unlinkSync(filePath); + fs.writeFileSync( + path.join(path.dirname(filePath), newFileName), + existingContent, + ); + + console.log(chalk.green(`Renamed to "${newFileName}"`)); + console.log(); + console.groupEnd(); + }); }; await run(process.env.CANDIDATES_DIR); diff --git a/bin/lyricsRomanianDictionaryAnalyzer.ts b/bin/lyricsRomanianDictionaryAnalyzer.ts index 8181f202e..17ebcc99f 100644 --- a/bin/lyricsRomanianDictionaryAnalyzer.ts +++ b/bin/lyricsRomanianDictionaryAnalyzer.ts @@ -8,7 +8,6 @@ import path from 'path'; import * as process from 'process'; import * as util from 'util'; import { parseArgs } from 'node:util'; -import recursive from 'recursive-readdir'; import { first, flatten, @@ -30,6 +29,7 @@ import { getTitleByRawSection, NEW_LINE, NEW_LINE_TUPLE, + readTxtFilesRecursively, SongSection, TEST_FILE, TXT_EXTENSION, @@ -44,7 +44,7 @@ dotenv.config(); const analyzeAndGet = async (dir: string, speller: NSpell) => { const incorrectWords = [] as string[]; - (await recursive(dir)) + (await readTxtFilesRecursively(dir)) .filter((file) => !includes(file, TEST_FILE)) .filter((filePath) => path.extname(filePath) === TXT_EXTENSION) .forEach((filePath) => { diff --git a/bin/lyricsSimilarityValidator.ts b/bin/lyricsSimilarityValidator.ts index 13fa70266..32ae77c42 100644 --- a/bin/lyricsSimilarityValidator.ts +++ b/bin/lyricsSimilarityValidator.ts @@ -10,12 +10,13 @@ import { isEmpty, isEqual, negate } from 'lodash-es'; import fsExtra from 'fs-extra'; import dotenv from 'dotenv'; import stringSimilarity from 'string-similarity'; -import recursive from 'recursive-readdir'; import chalk from 'chalk'; import { ALT_SONGS_FILE_SUFFIX, - NEW_LINE, logFileWithLinkInConsole, + NEW_LINE, + parse, + readTxtFilesRecursively, } from '../src/index.js'; dotenv.config(); @@ -23,7 +24,7 @@ dotenv.config(); const THRESHOLD = 0.65; const readAllFilesAgainstTheChecksAreDoneOnce = async (againstDir: string) => - (await recursive(againstDir)).map((filePath) => { + (await readTxtFilesRecursively(againstDir)).map((filePath) => { return { contentAsString: fs.readFileSync(filePath).toString(), fileName: path.basename(filePath), @@ -31,6 +32,20 @@ const readAllFilesAgainstTheChecksAreDoneOnce = async (againstDir: string) => }; }); +const getRelevantContentOnly = (contentAsString: string) => { + const { sectionOrder, sectionsMap } = parse(contentAsString, { + ignoreUniquenessErrors: true, + }); + + return sectionOrder + .map( + (verseSongSectionIdentifier) => + sectionsMap[verseSongSectionIdentifier].content, + ) + .join(NEW_LINE) + .toLowerCase(); +}; + const computeSimilarity = (candidateFilePath: string) => ({ @@ -42,10 +57,9 @@ const computeSimilarity = fileName: string; filePath: string; }) => { - const candidateContent = fs.readFileSync(candidateFilePath).toString(); const similarity = stringSimilarity.compareTwoStrings( - contentAsString.toLowerCase(), - candidateContent.toLowerCase(), + getRelevantContentOnly(contentAsString), + getRelevantContentOnly(fs.readFileSync(candidateFilePath).toString()), ); return { @@ -62,7 +76,7 @@ const findSimilarities = async ( const againstSongs = await readAllFilesAgainstTheChecksAreDoneOnce(againstDir); - return (await recursive(potentialDuplicatesDir)) + return (await readTxtFilesRecursively(potentialDuplicatesDir)) .map((candidateFilePath) => { const candidateFileName = path.basename(candidateFilePath); diff --git a/bin/lyricsTextReprocessorRunner.ts b/bin/lyricsTextReprocessorRunner.ts index 9c05c73c9..c5a90bb10 100644 --- a/bin/lyricsTextReprocessorRunner.ts +++ b/bin/lyricsTextReprocessorRunner.ts @@ -7,13 +7,12 @@ import path from 'path'; import * as process from 'process'; import dotenv from 'dotenv'; import { flow } from 'lodash-es'; -import recursive from 'recursive-readdir'; import { contentReplacerReprocessor, contentStructureReprocessor, logFileWithLinkInConsole, logProcessingFile, - TXT_EXTENSION, + readTxtFilesRecursively, } from '../src/index.js'; dotenv.config(); @@ -21,22 +20,20 @@ dotenv.config(); const run = async (dir: string) => { console.log(`"Reprocessing file contents from ${dir} directory.."`); - (await recursive(dir)) - .filter((filePath) => path.extname(filePath) === TXT_EXTENSION) - .forEach((filePath) => { - const songContent = fs.readFileSync(filePath).toString(); - const fileName = path.basename(filePath); - logProcessingFile(fileName, 'file contents'); - logFileWithLinkInConsole(filePath); + (await readTxtFilesRecursively(dir)).forEach((filePath) => { + const songContent = fs.readFileSync(filePath).toString(); + const fileName = path.basename(filePath); + logProcessingFile(fileName, 'file contents'); + logFileWithLinkInConsole(filePath); - fs.writeFileSync( - path.join(path.dirname(filePath), fileName), - flow([ - contentReplacerReprocessor.reprocess, - contentStructureReprocessor.reprocess, - ])(songContent), - ); - }); + fs.writeFileSync( + path.join(path.dirname(filePath), fileName), + flow([ + contentReplacerReprocessor.reprocess, + contentStructureReprocessor.reprocess, + ])(songContent), + ); + }); }; await run(process.env.CANDIDATES_DIR); diff --git a/bin/lyricsTextValidator.ts b/bin/lyricsTextValidator.ts index b76a3d88d..464b9247b 100644 --- a/bin/lyricsTextValidator.ts +++ b/bin/lyricsTextValidator.ts @@ -3,30 +3,29 @@ import path from 'path'; import * as process from 'process'; import { flattenDeep, isEmpty, negate, uniq } from 'lodash-es'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import chalk from 'chalk'; import { assemblyCharsStats, ERROR_CODE, logFileWithLinkInConsole, logProcessingFile, - TXT_EXTENSION, + readTxtFilesRecursively, verifyStructure, } from '../src/index.js'; dotenv.config(); const runValidationForDir = async (dir: string) => { - const arrayOfFileNameAndContent = (await recursive(dir)) - .filter((filePath) => filePath.endsWith(TXT_EXTENSION)) - .map((filePath) => { + const arrayOfFileNameAndContent = (await readTxtFilesRecursively(dir)).map( + (filePath) => { const fileName = path.basename(filePath); const fileContent = fs.readFileSync(filePath).toString(); logProcessingFile(fileName, 'content validation'); logFileWithLinkInConsole(filePath); return { filePath, fileName, fileContent }; - }); + }, + ); // --- // Chars problems diff --git a/candidates/Ekklesia Arad/Ekklesia Arad - Biruitor.txt b/candidates/Ekklesia/Ekklesia Arad - Biruitor.txt similarity index 100% rename from candidates/Ekklesia Arad/Ekklesia Arad - Biruitor.txt rename to candidates/Ekklesia/Ekklesia Arad - Biruitor.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Aleg sa cred.txt b/candidates/Grup Eldad/Grupul Eldad - Aleg sa cred.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Aleg sa cred.txt rename to candidates/Grup Eldad/Grupul Eldad - Aleg sa cred.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Cand domnul porunceste.txt b/candidates/Grup Eldad/Grupul Eldad - Cand domnul porunceste.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Cand domnul porunceste.txt rename to candidates/Grup Eldad/Grupul Eldad - Cand domnul porunceste.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Cine e oare cel mai sfant sub soare.txt b/candidates/Grup Eldad/Grupul Eldad - Cine e oare cel mai sfant sub soare.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Cine e oare cel mai sfant sub soare.txt rename to candidates/Grup Eldad/Grupul Eldad - Cine e oare cel mai sfant sub soare.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Fara dumnezeu.txt b/candidates/Grup Eldad/Grupul Eldad - Fara dumnezeu.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Fara dumnezeu.txt rename to candidates/Grup Eldad/Grupul Eldad - Fara dumnezeu.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Fiti tari.txt b/candidates/Grup Eldad/Grupul Eldad - Fiti tari.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Fiti tari.txt rename to candidates/Grup Eldad/Grupul Eldad - Fiti tari.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Imparte-ti painea.txt b/candidates/Grup Eldad/Grupul Eldad - Imparte-ti painea.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Imparte-ti painea.txt rename to candidates/Grup Eldad/Grupul Eldad - Imparte-ti painea.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Luptam.txt b/candidates/Grup Eldad/Grupul Eldad - Luptam.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Luptam.txt rename to candidates/Grup Eldad/Grupul Eldad - Luptam.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Pacea lui.txt b/candidates/Grup Eldad/Grupul Eldad - Pacea lui.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Pacea lui.txt rename to candidates/Grup Eldad/Grupul Eldad - Pacea lui.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Plecam astazi la lupta.txt b/candidates/Grup Eldad/Grupul Eldad - Plecam astazi la lupta.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Plecam astazi la lupta.txt rename to candidates/Grup Eldad/Grupul Eldad - Plecam astazi la lupta.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Sa fiu neprihanit- (placute-ti sunt o doamne faptele curate ).txt b/candidates/Grup Eldad/Grupul Eldad - Sa fiu neprihanit- (placute-ti sunt o doamne faptele curate ).txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Sa fiu neprihanit- (placute-ti sunt o doamne faptele curate ).txt rename to candidates/Grup Eldad/Grupul Eldad - Sa fiu neprihanit- (placute-ti sunt o doamne faptele curate ).txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Se clatina lumea.txt b/candidates/Grup Eldad/Grupul Eldad - Se clatina lumea.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Se clatina lumea.txt rename to candidates/Grup Eldad/Grupul Eldad - Se clatina lumea.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Te asteptam sa vii din nou isuse.txt b/candidates/Grup Eldad/Grupul Eldad - Te asteptam sa vii din nou isuse.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Te asteptam sa vii din nou isuse.txt rename to candidates/Grup Eldad/Grupul Eldad - Te asteptam sa vii din nou isuse.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Tie iti cantam.txt b/candidates/Grup Eldad/Grupul Eldad - Tie iti cantam.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Tie iti cantam.txt rename to candidates/Grup Eldad/Grupul Eldad - Tie iti cantam.txt diff --git a/candidates/Grupul Eldad/Grupul Eldad - Vas de lut.txt b/candidates/Grup Eldad/Grupul Eldad - Vas de lut.txt similarity index 100% rename from candidates/Grupul Eldad/Grupul Eldad - Vas de lut.txt rename to candidates/Grup Eldad/Grupul Eldad - Vas de lut.txt diff --git a/src/core.ts b/src/core.ts index d2defcd7b..6751e0685 100644 --- a/src/core.ts +++ b/src/core.ts @@ -13,6 +13,9 @@ import { import * as crypto from 'crypto'; import chalk from 'chalk'; import short from 'short-uuid'; +import assert from 'node:assert'; +import recursive from 'recursive-readdir'; +import path from 'path'; import { SequenceChar, SongMeta, SongSection } from './types.js'; import { COLON, @@ -23,8 +26,8 @@ import { NEW_LINE_TUPLE, SEMICOLON, TEST_ENV, + TXT_EXTENSION, } from './constants.js'; -import assert from 'node:assert'; const MISSING_SEQUENCE_NUMBER = 1; @@ -192,3 +195,8 @@ export const getMetaSectionsFromTitle = (titleContent: string) => { export const multiToSingle = (text: string) => text?.split(SEMICOLON)?.map(trim).join(`${SEMICOLON}${EMPTY_SPACE}`); + +export const readTxtFilesRecursively = async (dir: string) => + (await recursive(dir)).filter((filePath) => + isEqual(TXT_EXTENSION, path.extname(filePath)), + ); diff --git a/temp-runners/importSongsFromRcByAuthors.ts b/temp-runners/importSongsFromRcByAuthors.ts index 2a455b55f..3581ce8d3 100644 --- a/temp-runners/importSongsFromRcByAuthors.ts +++ b/temp-runners/importSongsFromRcByAuthors.ts @@ -3,7 +3,6 @@ import path from 'path'; import * as process from 'process'; import fsExtra from 'fs-extra'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import pMap from 'p-map'; import { flatten } from 'lodash-es'; import { fileURLToPath } from 'url'; @@ -14,6 +13,7 @@ import { NEW_LINE, parse, print, + readTxtFilesRecursively, } from '../src/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -35,7 +35,7 @@ const rcAuthorPathsToProcess = fsExtra .filter(Boolean); const readFiles = async (dir: string) => - (await recursive(dir)).map((filePath) => { + (await readTxtFilesRecursively(dir)).map((filePath) => { return { contentAsString: fs.readFileSync(filePath).toString(), fileName: path.basename(filePath), diff --git a/temp-runners/importSongsFromRcByIds.ts b/temp-runners/importSongsFromRcByIds.ts index 39d676ef2..eb8d8db86 100644 --- a/temp-runners/importSongsFromRcByIds.ts +++ b/temp-runners/importSongsFromRcByIds.ts @@ -3,7 +3,6 @@ import path from 'path'; import * as process from 'process'; import fsExtra from 'fs-extra'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import pMap from 'p-map'; import { first, flatten } from 'lodash-es'; import { fileURLToPath } from 'url'; @@ -14,6 +13,7 @@ import { NEW_LINE, parse, print, + readTxtFilesRecursively, } from '../src/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -38,7 +38,7 @@ const RC_INDEX = JSON.parse( ); const readFiles = async (dir: string) => - (await recursive(dir)).map((filePath) => { + (await readTxtFilesRecursively(dir)).map((filePath) => { return { contentAsString: fs.readFileSync(filePath).toString(), fileName: path.basename(filePath), diff --git a/temp-runners/moveExistingWorshipSongsToDir.ts b/temp-runners/moveExistingWorshipSongsToDir.ts index ae7ce9cc7..4e7ce7a27 100644 --- a/temp-runners/moveExistingWorshipSongsToDir.ts +++ b/temp-runners/moveExistingWorshipSongsToDir.ts @@ -3,14 +3,16 @@ import path from 'path'; import * as process from 'process'; import fsExtra from 'fs-extra'; import dotenv from 'dotenv'; -import recursive from 'recursive-readdir'; import pMap from 'p-map'; import stringSimilarity from 'string-similarity'; import { flatten, without } from 'lodash-es'; -import { parse } from '../src/songParser.js'; -import { NEW_LINE, SLASH } from '../src/index.js'; - import { fileURLToPath } from 'url'; +import { + NEW_LINE, + parse, + readTxtFilesRecursively, + SLASH, +} from '../src/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -26,7 +28,7 @@ const pickedPartialTitleSongs = fsExtra .filter(Boolean); const readFiles = async (dir: string) => - (await recursive(dir)).map((filePath) => { + (await readTxtFilesRecursively(dir)).map((filePath) => { return { contentAsString: fs.readFileSync(filePath).toString(), fileName: path.basename(filePath),