diff --git a/CHANGELOG.md b/CHANGELOG.md index 999ac4bb..72440274 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ All changes that impact users of this module are documented in this file, in the - Add GitLab functionalities +## 2.5.0 - 2024-10-29 + +_Full changeset and discussions: [#1115](https://github.com/OpenTermsArchive/engine/pull/1115)._ + +> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs. + +### Added + +- Add script to remove duplicate issues in GitHub reports + ## 2.4.0 - 2024-10-24 _Full changeset and discussions: [#1114](https://github.com/OpenTermsArchive/engine/pull/1114)._ diff --git a/package-lock.json b/package-lock.json index 99fd83b6..37184856 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "license": "EUPL-1.2", "dependencies": { "@accordproject/markdown-cicero": "^0.15.2", diff --git a/package.json b/package.json index b8b878a9..70a49792 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "description": "Tracks and makes visible changes to the terms of online services", "homepage": "https://opentermsarchive.org", "bugs": { diff --git a/scripts/reporter/duplicate/README.md b/scripts/reporter/duplicate/README.md new file mode 100644 index 00000000..f0808d63 --- /dev/null +++ b/scripts/reporter/duplicate/README.md @@ -0,0 +1,37 @@ +# Duplicate issues removal script + +This script helps remove duplicate issues from a GitHub repository by closing issues that have the same title as any older issue. + +## Prerequisites + +1. Set up environment variables: + - Create a `.env` file in the root directory + - Add the GitHub personal access token of the bot that manages issues on your collection, with `repo` permissions: + +```shell +OTA_ENGINE_GITHUB_TOKEN=your_github_token +``` + +2. Configure the target repository in your chosen configuration file within the `config` folder: + +```json +{ + "@opentermsarchive/engine": { + "reporter": { + "githubIssues": { + "repositories": { + "declarations": "owner/repository" + } + } + } + } +} +``` + +## Usage + +Run the script using: + +```shell +node scripts/reporter/duplicate/index.js +``` diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js new file mode 100644 index 00000000..12ede9e6 --- /dev/null +++ b/scripts/reporter/duplicate/index.js @@ -0,0 +1,73 @@ +import 'dotenv/config'; +import config from 'config'; +import { Octokit } from 'octokit'; + +async function removeDuplicateIssues() { + const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); + + if (!repository.includes('/') || repository.includes('https://')) { + throw new Error(`Configuration entry "reporter.githubIssues.repositories.declarations" is expected to be a string in the format /, but received: "${repository}"`); + } + + const [ owner, repo ] = repository.split('/'); + + const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); + + console.log(`Getting issues from repository ${repository}…`); + + const issues = await octokit.paginate('GET /repos/{owner}/{repo}/issues', { + owner, + repo, + state: 'open', + per_page: 100, + }); + + const onlyIssues = issues.filter(issue => !issue.pull_request); + const issuesByTitle = new Map(); + let counter = 0; + + console.log(`Found ${onlyIssues.length} issues`); + + for (const issue of onlyIssues) { + if (!issuesByTitle.has(issue.title)) { + issuesByTitle.set(issue.title, [issue]); + } else { + issuesByTitle.get(issue.title).push(issue); + } + } + + for (const [ title, duplicateIssues ] of issuesByTitle) { + if (duplicateIssues.length === 1) continue; + + const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); + + console.log(`\nFound ${duplicateIssues.length - 1} duplicates for issue #${originalIssue.number} "${title}"`); + + for (const issue of duplicateIssues) { + if (issue.number === originalIssue.number) { + continue; + } + + await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + state: 'closed', + }); + + await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + body: `This issue is detected as duplicate as it has the same title as #${originalIssue.number}. It most likely was created accidentally by an engine older than [v2.3.2](https://github.com/OpenTermsArchive/engine/releases/tag/v2.3.2). Closing automatically.`, + }); + + counter++; + console.log(`Closed issue #${issue.number}: ${issue.html_url}`); + } + } + + console.log(`\nDuplicate removal process completed; ${counter} issues closed`); +} + +removeDuplicateIssues();