From 636c49b421ef655e742cfe9fbe73325a85a33339 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 11:12:00 +0100 Subject: [PATCH 1/9] Add script to remove duplicate issues --- scripts/reporter/duplicate/README.md | 35 +++++++++++++ scripts/reporter/duplicate/index.js | 76 ++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 scripts/reporter/duplicate/README.md create mode 100644 scripts/reporter/duplicate/index.js diff --git a/scripts/reporter/duplicate/README.md b/scripts/reporter/duplicate/README.md new file mode 100644 index 000000000..a7b96fb79 --- /dev/null +++ b/scripts/reporter/duplicate/README.md @@ -0,0 +1,35 @@ +# Duplicate issues removal script + +This script helps remove duplicate issues from a GitHub repository by closing newer duplicate issues. + +## Prerequisites + +1. Set up environment variables: + - Create a `.env` file in the root directory + - Add the GitHub personal access token of the bot that manage issues on your collection with repo permissions: + ``` + OTA_ENGINE_GITHUB_TOKEN=your_github_token + ``` + +2. Configure the target repository in `config/development.json`: + ```json + { + "@opentermsarchive/engine": { + "reporter": { + "githubIssues": { + "repositories": { + "declarations": "owner/repository" + } + } + } + } + } + ``` + +## Usage + +Run the script using: + +``` +node scripts/reporter/duplicate/index.js +``` diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js new file mode 100644 index 000000000..b7d0a2101 --- /dev/null +++ b/scripts/reporter/duplicate/index.js @@ -0,0 +1,76 @@ +import 'dotenv/config'; +import config from 'config'; +import { Octokit } from 'octokit'; + +async function removeDuplicateIssues() { + try { + const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); + const [ owner, repo ] = repository.split('/'); + + if (!repository) { + throw new Error('Repository configuration is not set'); + } + + const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); + + console.log(`Getting issues from repository ${repository}…`); + + const issues = await octokit.paginate('GET /repos/{owner}/{repo}/issues', { + owner, + repo, + state: 'open', + per_page: 100, + }); + + const onlyIssues = issues.filter(issue => !issue.pull_request); + const issuesByTitle = new Map(); + let counter = 0; + + console.log(`Found ${onlyIssues.length} issues`); + + for (const issue of onlyIssues) { + if (!issuesByTitle.has(issue.title)) { + issuesByTitle.set(issue.title, issue); + continue; + } + + const existingIssue = issuesByTitle.get(issue.title); + + console.log(`Found duplicate for issue "${issue.title}"`); + + let issueToClose; + + if (new Date(issue.created_at) > new Date(existingIssue.created_at)) { + issueToClose = issue; + } else { + issueToClose = existingIssue; + issuesByTitle.set(issue.title, issue); + } + + await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issueToClose.number, + state: 'closed', + }); + + await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issueToClose.number, + body: 'Closed duplicate issue.', + }); + + counter++; + console.log(`Closed issue #${issueToClose.number}: ${issueToClose.html_url}`); + } + + console.log(`Removed ${counter} issues`); + console.log('Duplicate removal process completed'); + } catch (error) { + console.log(`Failed to remove duplicate issues: ${error.stack}`); + process.exit(1); + } +} + +removeDuplicateIssues(); From 4899a4602ce257c1de15e8782195c2a191330d7c Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 11:55:50 +0100 Subject: [PATCH 2/9] Close duplicates with link to original issue --- scripts/reporter/duplicate/index.js | 59 +++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index b7d0a2101..7d3bde0a4 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -30,43 +30,44 @@ async function removeDuplicateIssues() { for (const issue of onlyIssues) { if (!issuesByTitle.has(issue.title)) { - issuesByTitle.set(issue.title, issue); - continue; + issuesByTitle.set(issue.title, [issue]); + } else { + issuesByTitle.get(issue.title).push(issue); } + } - const existingIssue = issuesByTitle.get(issue.title); + for (const [ title, duplicateIssues ] of issuesByTitle) { + if (duplicateIssues.length === 1) continue; - console.log(`Found duplicate for issue "${issue.title}"`); + const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); - let issueToClose; + console.log(`\nFound ${duplicateIssues.length - 1} duplicates for issue #${originalIssue.number} "${title}"`); - if (new Date(issue.created_at) > new Date(existingIssue.created_at)) { - issueToClose = issue; - } else { - issueToClose = existingIssue; - issuesByTitle.set(issue.title, issue); - } + for (const issue of duplicateIssues) { + if (issue.number === originalIssue.number) { + continue; + } - await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ - owner, - repo, - issue_number: issueToClose.number, - state: 'closed', - }); - - await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ - owner, - repo, - issue_number: issueToClose.number, - body: 'Closed duplicate issue.', - }); - - counter++; - console.log(`Closed issue #${issueToClose.number}: ${issueToClose.html_url}`); + await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + state: 'closed', + }); + + await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + body: `Closing duplicate issue. Original issue: #${originalIssue.number}`, + }); + + counter++; + console.log(`Closed issue #${issue.number}: ${issue.html_url}`); + } } - console.log(`Removed ${counter} issues`); - console.log('Duplicate removal process completed'); + console.log(`\nDuplicate removal process completed; ${counter} issues closed`); } catch (error) { console.log(`Failed to remove duplicate issues: ${error.stack}`); process.exit(1); From 24a8c01625f9bbddc518c9db03d06fcb1e784079 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 14:08:52 +0100 Subject: [PATCH 3/9] Add changelog entry --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1271699b..f6eb57626 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased [minor] + +> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs. + +### Added + +- Add script to remove duplicate issues in GitHub reports + ## 2.4.0 - 2024-10-24 _Full changeset and discussions: [#1114](https://github.com/OpenTermsArchive/engine/pull/1114)._ From 1eace8eb7b1e4541ad13bdc307e04a2af0bda2db Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 14:32:00 +0100 Subject: [PATCH 4/9] Update scripts/reporter/duplicate/index.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Biron --- scripts/reporter/duplicate/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index 7d3bde0a4..743feef94 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -5,11 +5,11 @@ import { Octokit } from 'octokit'; async function removeDuplicateIssues() { try { const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); - const [ owner, repo ] = repository.split('/'); - if (!repository) { throw new Error('Repository configuration is not set'); } + + const [ owner, repo ] = repository.split('/'); const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); From 5f02583ad627c6d97915b3b455a5cc111125e735 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 14:39:12 +0100 Subject: [PATCH 5/9] Improve configuration checks --- scripts/reporter/duplicate/index.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index 743feef94..c73c658af 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -5,10 +5,11 @@ import { Octokit } from 'octokit'; async function removeDuplicateIssues() { try { const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); - if (!repository) { - throw new Error('Repository configuration is not set'); + + if (!repository.includes('/') || repository.includes('https://')) { + throw new Error(`Configuration entry "reporter.githubIssues.repositories.declarations" is expected to be a string in the format /, but received: "${repository}"`); } - + const [ owner, repo ] = repository.split('/'); const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); From 8ea50a7455ca445243417797dee7568d9284418a Mon Sep 17 00:00:00 2001 From: Matti Schneider Date: Tue, 29 Oct 2024 15:27:17 +0100 Subject: [PATCH 6/9] Improve copywriting --- scripts/reporter/duplicate/README.md | 4 ++-- scripts/reporter/duplicate/index.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/reporter/duplicate/README.md b/scripts/reporter/duplicate/README.md index a7b96fb79..c38ba5c47 100644 --- a/scripts/reporter/duplicate/README.md +++ b/scripts/reporter/duplicate/README.md @@ -1,12 +1,12 @@ # Duplicate issues removal script -This script helps remove duplicate issues from a GitHub repository by closing newer duplicate issues. +This script helps remove duplicate issues from a GitHub repository by closing issues that have the same title as any older issue. ## Prerequisites 1. Set up environment variables: - Create a `.env` file in the root directory - - Add the GitHub personal access token of the bot that manage issues on your collection with repo permissions: + - Add the GitHub personal access token of the bot that manages issues on your collection, with `repo` permissions: ``` OTA_ENGINE_GITHUB_TOKEN=your_github_token ``` diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index c73c658af..07f83ee93 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -60,7 +60,7 @@ async function removeDuplicateIssues() { owner, repo, issue_number: issue.number, - body: `Closing duplicate issue. Original issue: #${originalIssue.number}`, + body: `This issue is detected as duplicate as it has the same title as #${originalIssue.number}. It most likely was created accidentally by an engine older than [v2.3.2](https://github.com/OpenTermsArchive/engine/releases/tag/v2.3.2). Closing automatically.`, }); counter++; From 317eaf65bb924c8e1cc88808425c6cf4b7956e44 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 15:46:27 +0100 Subject: [PATCH 7/9] Improve readme --- scripts/reporter/duplicate/README.md | 40 +++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/scripts/reporter/duplicate/README.md b/scripts/reporter/duplicate/README.md index c38ba5c47..f0808d633 100644 --- a/scripts/reporter/duplicate/README.md +++ b/scripts/reporter/duplicate/README.md @@ -7,29 +7,31 @@ This script helps remove duplicate issues from a GitHub repository by closing is 1. Set up environment variables: - Create a `.env` file in the root directory - Add the GitHub personal access token of the bot that manages issues on your collection, with `repo` permissions: - ``` - OTA_ENGINE_GITHUB_TOKEN=your_github_token - ``` - -2. Configure the target repository in `config/development.json`: - ```json - { - "@opentermsarchive/engine": { - "reporter": { - "githubIssues": { - "repositories": { - "declarations": "owner/repository" - } - } - } - } - } - ``` + +```shell +OTA_ENGINE_GITHUB_TOKEN=your_github_token +``` + +2. Configure the target repository in your chosen configuration file within the `config` folder: + +```json +{ + "@opentermsarchive/engine": { + "reporter": { + "githubIssues": { + "repositories": { + "declarations": "owner/repository" + } + } + } + } +} +``` ## Usage Run the script using: -``` +```shell node scripts/reporter/duplicate/index.js ``` From 903f399f72c5465c77b2b329f1e0d5c43448aeab Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 29 Oct 2024 15:46:36 +0100 Subject: [PATCH 8/9] Remove obsolete try/catch block --- scripts/reporter/duplicate/index.js | 103 +++++++++++++--------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/scripts/reporter/duplicate/index.js b/scripts/reporter/duplicate/index.js index 07f83ee93..12ede9e67 100644 --- a/scripts/reporter/duplicate/index.js +++ b/scripts/reporter/duplicate/index.js @@ -3,76 +3,71 @@ import config from 'config'; import { Octokit } from 'octokit'; async function removeDuplicateIssues() { - try { - const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); + const repository = config.get('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations'); - if (!repository.includes('/') || repository.includes('https://')) { - throw new Error(`Configuration entry "reporter.githubIssues.repositories.declarations" is expected to be a string in the format /, but received: "${repository}"`); - } + if (!repository.includes('/') || repository.includes('https://')) { + throw new Error(`Configuration entry "reporter.githubIssues.repositories.declarations" is expected to be a string in the format /, but received: "${repository}"`); + } - const [ owner, repo ] = repository.split('/'); + const [ owner, repo ] = repository.split('/'); - const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); + const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN }); - console.log(`Getting issues from repository ${repository}…`); + console.log(`Getting issues from repository ${repository}…`); - const issues = await octokit.paginate('GET /repos/{owner}/{repo}/issues', { - owner, - repo, - state: 'open', - per_page: 100, - }); + const issues = await octokit.paginate('GET /repos/{owner}/{repo}/issues', { + owner, + repo, + state: 'open', + per_page: 100, + }); - const onlyIssues = issues.filter(issue => !issue.pull_request); - const issuesByTitle = new Map(); - let counter = 0; + const onlyIssues = issues.filter(issue => !issue.pull_request); + const issuesByTitle = new Map(); + let counter = 0; - console.log(`Found ${onlyIssues.length} issues`); + console.log(`Found ${onlyIssues.length} issues`); - for (const issue of onlyIssues) { - if (!issuesByTitle.has(issue.title)) { - issuesByTitle.set(issue.title, [issue]); - } else { - issuesByTitle.get(issue.title).push(issue); - } + for (const issue of onlyIssues) { + if (!issuesByTitle.has(issue.title)) { + issuesByTitle.set(issue.title, [issue]); + } else { + issuesByTitle.get(issue.title).push(issue); } + } - for (const [ title, duplicateIssues ] of issuesByTitle) { - if (duplicateIssues.length === 1) continue; - - const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); - - console.log(`\nFound ${duplicateIssues.length - 1} duplicates for issue #${originalIssue.number} "${title}"`); - - for (const issue of duplicateIssues) { - if (issue.number === originalIssue.number) { - continue; - } + for (const [ title, duplicateIssues ] of issuesByTitle) { + if (duplicateIssues.length === 1) continue; - await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ - owner, - repo, - issue_number: issue.number, - state: 'closed', - }); + const originalIssue = duplicateIssues.reduce((oldest, current) => (new Date(current.created_at) < new Date(oldest.created_at) ? current : oldest)); - await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ - owner, - repo, - issue_number: issue.number, - body: `This issue is detected as duplicate as it has the same title as #${originalIssue.number}. It most likely was created accidentally by an engine older than [v2.3.2](https://github.com/OpenTermsArchive/engine/releases/tag/v2.3.2). Closing automatically.`, - }); + console.log(`\nFound ${duplicateIssues.length - 1} duplicates for issue #${originalIssue.number} "${title}"`); - counter++; - console.log(`Closed issue #${issue.number}: ${issue.html_url}`); + for (const issue of duplicateIssues) { + if (issue.number === originalIssue.number) { + continue; } - } - console.log(`\nDuplicate removal process completed; ${counter} issues closed`); - } catch (error) { - console.log(`Failed to remove duplicate issues: ${error.stack}`); - process.exit(1); + await octokit.request('PATCH /repos/{owner}/{repo}/issues/{issue_number}', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + state: 'closed', + }); + + await octokit.request('POST /repos/{owner}/{repo}/issues/{issue_number}/comments', { /* eslint-disable-line no-await-in-loop */ + owner, + repo, + issue_number: issue.number, + body: `This issue is detected as duplicate as it has the same title as #${originalIssue.number}. It most likely was created accidentally by an engine older than [v2.3.2](https://github.com/OpenTermsArchive/engine/releases/tag/v2.3.2). Closing automatically.`, + }); + + counter++; + console.log(`Closed issue #${issue.number}: ${issue.html_url}`); + } } + + console.log(`\nDuplicate removal process completed; ${counter} issues closed`); } removeDuplicateIssues(); From 4cdde4bf4d5c68126beea8bba239c67b750a0f51 Mon Sep 17 00:00:00 2001 From: Open Terms Archive Release Bot Date: Tue, 29 Oct 2024 15:33:16 +0000 Subject: [PATCH 9/9] Release v2.5.0 --- CHANGELOG.md | 4 +++- package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6eb57626..a6dc587c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased [minor] +## 2.5.0 - 2024-10-29 + +_Full changeset and discussions: [#1115](https://github.com/OpenTermsArchive/engine/pull/1115)._ > Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs. diff --git a/package-lock.json b/package-lock.json index b70bf1a73..5d7c06bcd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "license": "EUPL-1.2", "dependencies": { "@accordproject/markdown-cicero": "^0.15.2", diff --git a/package.json b/package.json index b8b878a9e..70a49792d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@opentermsarchive/engine", - "version": "2.4.0", + "version": "2.5.0", "description": "Tracks and makes visible changes to the terms of online services", "homepage": "https://opentermsarchive.org", "bugs": {