Skip to content

Commit

Permalink
feat: Improve the existing stack (#186)
Browse files Browse the repository at this point in the history
#### Motivation and context

<!--- Why is this change required? -->

#### Checklist:

- [x] I only use the allowed chars
  • Loading branch information
ioanlucut authored Oct 2, 2023
2 parents c414f9e + c30995d commit 01406fd
Show file tree
Hide file tree
Showing 38 changed files with 152 additions and 541 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ jobs:
filters: |
rcImportsHaveChanged:
- 'temp-runners/rc_authors_to_process.txt'
- 'temp-runners/rc_ids_to_ignore.txt'
- 'temp-runners/rc_ids_to_process.txt'
Build:
Expand Down
3 changes: 0 additions & 3 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,3 @@
/candidates/* @EmanuelVecerdea
/LaTeX/* @EmanuelVecerdea
/LaTeX/* @EmanuelVecerdea
/temp_runners/manual_picks.txt @EmanuelVecerdea
/temp_runners/rc_authors_to_process.txt @EmanuelVecerdea
/temp_runners/rc_ids_to_process.txt @EmanuelVecerdea
35 changes: 16 additions & 19 deletions bin/lyricsContentHashValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import path from 'path';
import * as process from 'process';
import { filter, includes, isEmpty } from 'lodash-es';
import dotenv from 'dotenv';
import recursive from 'recursive-readdir';
import chalk from 'chalk';
import assert from 'node:assert';
import fs from 'fs';
Expand All @@ -12,31 +11,29 @@ import {
getSongInSectionTuples,
logFileWithLinkInConsole,
logProcessingFile,
readTxtFilesRecursively,
SongMeta,
TXT_EXTENSION,
} from '../src/index.js';

dotenv.config();

const runValidationForDir = async (dir: string) => {
const duplicateHashes = filter(
(await recursive(dir))
.filter((filePath) => filePath.endsWith(TXT_EXTENSION))
.map((filePath) => {
const fileName = path.basename(filePath);
const fileContent = fs.readFileSync(filePath).toString();
logProcessingFile(fileName, 'content hash validation');
logFileWithLinkInConsole(filePath);

const maybeTitle = getSongInSectionTuples(fileContent)[1];

assert.ok(
maybeTitle.includes(SongMeta.CONTENT_HASH),
`The ${SongMeta.CONTENT_HASH} should be defined.`,
);

return getMetaSectionsFromTitle(maybeTitle)[SongMeta.CONTENT_HASH];
}),
(await readTxtFilesRecursively(dir)).map((filePath) => {
const fileName = path.basename(filePath);
const fileContent = fs.readFileSync(filePath).toString();
logProcessingFile(fileName, 'content hash validation');
logFileWithLinkInConsole(filePath);

const maybeTitle = getSongInSectionTuples(fileContent)[1];

assert.ok(
maybeTitle.includes(SongMeta.CONTENT_HASH),
`The ${SongMeta.CONTENT_HASH} should be defined.`,
);

return getMetaSectionsFromTitle(maybeTitle)[SongMeta.CONTENT_HASH];
}),
(current, index, iteratee) => includes(iteratee, current, index + 1),
);

Expand Down
59 changes: 28 additions & 31 deletions bin/lyricsFileNameReprocessorRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,50 @@ import fs from 'fs-extra';
import path from 'path';
import * as process from 'process';
import dotenv from 'dotenv';
import recursive from 'recursive-readdir';
import { isEqual } from 'lodash-es';
import chalk from 'chalk';
import {
getRawTitleBySong,
logFileWithLinkInConsole,
logProcessingFile,
lyricsFileNameReprocessor,
TXT_EXTENSION,
readTxtFilesRecursively,
} from '../src/index.js';

dotenv.config();

const run = async (dir: string) => {
console.log(`"Reprocessing file names from ${dir} directory.."`);

(await recursive(dir))
.filter((filePath) => path.extname(filePath) === TXT_EXTENSION)
.forEach((filePath) => {
const existingContent = fs.readFileSync(filePath).toString();
const fileName = path.basename(filePath);
logProcessingFile(fileName, 'file name');
logFileWithLinkInConsole(filePath);

const newFileName = lyricsFileNameReprocessor.deriveFromTitle(
getRawTitleBySong(existingContent),
);
const hasNoChange = isEqual(fileName, newFileName);

if (hasNoChange) {
console.log(chalk.yellow(`Skipped the ${fileName} file.`));
console.log();
console.groupEnd();

return;
}

fs.unlinkSync(filePath);
fs.writeFileSync(
path.join(path.dirname(filePath), newFileName),
existingContent,
);

console.log(chalk.green(`Renamed to "${newFileName}"`));
(await readTxtFilesRecursively(dir)).forEach((filePath) => {
const existingContent = fs.readFileSync(filePath).toString();
const fileName = path.basename(filePath);
logProcessingFile(fileName, 'file name');
logFileWithLinkInConsole(filePath);

const newFileName = lyricsFileNameReprocessor.deriveFromTitle(
getRawTitleBySong(existingContent),
);
const hasNoChange = isEqual(fileName, newFileName);

if (hasNoChange) {
console.log(chalk.yellow(`Skipped the ${fileName} file.`));
console.log();
console.groupEnd();
});

return;
}

fs.unlinkSync(filePath);
fs.writeFileSync(
path.join(path.dirname(filePath), newFileName),
existingContent,
);

console.log(chalk.green(`Renamed to "${newFileName}"`));
console.log();
console.groupEnd();
});
};

await run(process.env.CANDIDATES_DIR);
Expand Down
4 changes: 2 additions & 2 deletions bin/lyricsRomanianDictionaryAnalyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import path from 'path';
import * as process from 'process';
import * as util from 'util';
import { parseArgs } from 'node:util';
import recursive from 'recursive-readdir';
import {
first,
flatten,
Expand All @@ -30,6 +29,7 @@ import {
getTitleByRawSection,
NEW_LINE,
NEW_LINE_TUPLE,
readTxtFilesRecursively,
SongSection,
TEST_FILE,
TXT_EXTENSION,
Expand All @@ -44,7 +44,7 @@ dotenv.config();
const analyzeAndGet = async (dir: string, speller: NSpell) => {
const incorrectWords = [] as string[];

(await recursive(dir))
(await readTxtFilesRecursively(dir))
.filter((file) => !includes(file, TEST_FILE))
.filter((filePath) => path.extname(filePath) === TXT_EXTENSION)
.forEach((filePath) => {
Expand Down
28 changes: 21 additions & 7 deletions bin/lyricsSimilarityValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,42 @@ import { isEmpty, isEqual, negate } from 'lodash-es';
import fsExtra from 'fs-extra';
import dotenv from 'dotenv';
import stringSimilarity from 'string-similarity';
import recursive from 'recursive-readdir';
import chalk from 'chalk';
import {
ALT_SONGS_FILE_SUFFIX,
NEW_LINE,
logFileWithLinkInConsole,
NEW_LINE,
parse,
readTxtFilesRecursively,
} from '../src/index.js';

dotenv.config();

const THRESHOLD = 0.65;

const readAllFilesAgainstTheChecksAreDoneOnce = async (againstDir: string) =>
(await recursive(againstDir)).map((filePath) => {
(await readTxtFilesRecursively(againstDir)).map((filePath) => {
return {
contentAsString: fs.readFileSync(filePath).toString(),
fileName: path.basename(filePath),
filePath,
};
});

const getRelevantContentOnly = (contentAsString: string) => {
const { sectionOrder, sectionsMap } = parse(contentAsString, {
ignoreUniquenessErrors: true,
});

return sectionOrder
.map(
(verseSongSectionIdentifier) =>
sectionsMap[verseSongSectionIdentifier].content,
)
.join(NEW_LINE)
.toLowerCase();
};

const computeSimilarity =
(candidateFilePath: string) =>
({
Expand All @@ -42,10 +57,9 @@ const computeSimilarity =
fileName: string;
filePath: string;
}) => {
const candidateContent = fs.readFileSync(candidateFilePath).toString();
const similarity = stringSimilarity.compareTwoStrings(
contentAsString.toLowerCase(),
candidateContent.toLowerCase(),
getRelevantContentOnly(contentAsString),
getRelevantContentOnly(fs.readFileSync(candidateFilePath).toString()),
);

return {
Expand All @@ -62,7 +76,7 @@ const findSimilarities = async (
const againstSongs =
await readAllFilesAgainstTheChecksAreDoneOnce(againstDir);

return (await recursive(potentialDuplicatesDir))
return (await readTxtFilesRecursively(potentialDuplicatesDir))
.map((candidateFilePath) => {
const candidateFileName = path.basename(candidateFilePath);

Expand Down
31 changes: 14 additions & 17 deletions bin/lyricsTextReprocessorRunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,33 @@ import path from 'path';
import * as process from 'process';
import dotenv from 'dotenv';
import { flow } from 'lodash-es';
import recursive from 'recursive-readdir';
import {
contentReplacerReprocessor,
contentStructureReprocessor,
logFileWithLinkInConsole,
logProcessingFile,
TXT_EXTENSION,
readTxtFilesRecursively,
} from '../src/index.js';

dotenv.config();

const run = async (dir: string) => {
console.log(`"Reprocessing file contents from ${dir} directory.."`);

(await recursive(dir))
.filter((filePath) => path.extname(filePath) === TXT_EXTENSION)
.forEach((filePath) => {
const songContent = fs.readFileSync(filePath).toString();
const fileName = path.basename(filePath);
logProcessingFile(fileName, 'file contents');
logFileWithLinkInConsole(filePath);
(await readTxtFilesRecursively(dir)).forEach((filePath) => {
const songContent = fs.readFileSync(filePath).toString();
const fileName = path.basename(filePath);
logProcessingFile(fileName, 'file contents');
logFileWithLinkInConsole(filePath);

fs.writeFileSync(
path.join(path.dirname(filePath), fileName),
flow([
contentReplacerReprocessor.reprocess,
contentStructureReprocessor.reprocess,
])(songContent),
);
});
fs.writeFileSync(
path.join(path.dirname(filePath), fileName),
flow([
contentReplacerReprocessor.reprocess,
contentStructureReprocessor.reprocess,
])(songContent),
);
});
};

await run(process.env.CANDIDATES_DIR);
Expand Down
11 changes: 5 additions & 6 deletions bin/lyricsTextValidator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,29 @@ import path from 'path';
import * as process from 'process';
import { flattenDeep, isEmpty, negate, uniq } from 'lodash-es';
import dotenv from 'dotenv';
import recursive from 'recursive-readdir';
import chalk from 'chalk';
import {
assemblyCharsStats,
ERROR_CODE,
logFileWithLinkInConsole,
logProcessingFile,
TXT_EXTENSION,
readTxtFilesRecursively,
verifyStructure,
} from '../src/index.js';

dotenv.config();

const runValidationForDir = async (dir: string) => {
const arrayOfFileNameAndContent = (await recursive(dir))
.filter((filePath) => filePath.endsWith(TXT_EXTENSION))
.map((filePath) => {
const arrayOfFileNameAndContent = (await readTxtFilesRecursively(dir)).map(
(filePath) => {
const fileName = path.basename(filePath);
const fileContent = fs.readFileSync(filePath).toString();
logProcessingFile(fileName, 'content validation');
logFileWithLinkInConsole(filePath);

return { filePath, fileName, fileContent };
});
},
);

// ---
// Chars problems
Expand Down
65 changes: 0 additions & 65 deletions candidates/BBSO/BBSO - El e vrednic (medley).txt

This file was deleted.

Loading

0 comments on commit 01406fd

Please sign in to comment.