diff --git a/config/default.json b/config/default.json index 0d3d82eac..c65af2d44 100644 --- a/config/default.json +++ b/config/default.json @@ -56,9 +56,22 @@ "updateTemplateId": 7 } }, + "reporter": { + "githubIssues": { + "repositories": { + "declarations": "OpenTermsArchive/contrib-declarations" + } + }, + "gitlabIssues": { + "repositories": { + "declarations": "p2b/contrib-declarations" + } + } + }, "dataset": { "title": "sandbox", "versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox", + "versionsRepositoryURLGitLab": "https://gitlab.com/p2b/contrib-versions", "publishingSchedule": "30 8 * * MON" } } diff --git a/env.example b/env.example new file mode 100644 index 000000000..4e44f1e64 --- /dev/null +++ b/env.example @@ -0,0 +1,3 @@ +GITHUB_TOKEN= +GITLAB_TOKEN= +GITLAB_RELEASES_TOKEN= diff --git a/package-lock.json b/package-lock.json index 7fb266632..dfed6a315 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,6 +18,7 @@ "ajv": "^6.12.6", "archiver": "^5.3.0", "async": "^3.2.2", + "axios": "^1.7.2", "chai": "^4.3.4", "chai-as-promised": "^7.1.1", "chai-exclude": "^2.1.0", @@ -171,6 +172,14 @@ "npm": ">=6" } }, + "node_modules/@accordproject/concerto-util/node_modules/axios": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.23.0.tgz", + "integrity": "sha512-NmvAE4i0YAv5cKq8zlDoPd1VLKAqX5oLuZKs8xkJa4qi6RGn0uhCYFjWtHHC9EM/MwOwYWOs53W+V0aqEXq1sg==", + "dependencies": { + "follow-redirects": "^1.14.4" + } + }, "node_modules/@accordproject/markdown-cicero": { "version": "0.15.2", "resolved": "https://registry.npmjs.org/@accordproject/markdown-cicero/-/markdown-cicero-0.15.2.tgz", @@ -324,6 +333,14 @@ "npm": ">=6" } }, + "node_modules/@accordproject/markdown-pdf/node_modules/axios": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.23.0.tgz", + "integrity": "sha512-NmvAE4i0YAv5cKq8zlDoPd1VLKAqX5oLuZKs8xkJa4qi6RGn0uhCYFjWtHHC9EM/MwOwYWOs53W+V0aqEXq1sg==", + "dependencies": { + "follow-redirects": "^1.14.4" + } + }, "node_modules/@accordproject/markdown-pdf/node_modules/pdfjs-dist": { "version": "2.13.216", "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.13.216.tgz", @@ -3964,11 +3981,13 @@ } }, "node_modules/axios": { - "version": "0.23.0", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.23.0.tgz", - "integrity": "sha512-NmvAE4i0YAv5cKq8zlDoPd1VLKAqX5oLuZKs8xkJa4qi6RGn0uhCYFjWtHHC9EM/MwOwYWOs53W+V0aqEXq1sg==", + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz", + "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==", "dependencies": { - "follow-redirects": "^1.14.4" + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" } }, "node_modules/b4a": { @@ -6478,9 +6497,9 @@ "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" }, "node_modules/follow-redirects": { - "version": "1.15.3", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", - "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", "funding": [ { "type": "individual", diff --git a/package.json b/package.json index 8fccde58e..8da099a32 100644 --- a/package.json +++ b/package.json @@ -100,7 +100,8 @@ "swagger-jsdoc": "^6.2.8", "swagger-ui-express": "^5.0.0", "winston": "^3.3.3", - "winston-mail": "^2.0.0" + "winston-mail": "^2.0.0", + "axios": "^1.7.2" }, "devDependencies": { "@commitlint/cli": "^19.0.3", diff --git a/scripts/dataset/assets/README.templateGitLab.js b/scripts/dataset/assets/README.templateGitLab.js new file mode 100644 index 000000000..4fbb6d090 --- /dev/null +++ b/scripts/dataset/assets/README.templateGitLab.js @@ -0,0 +1,65 @@ +import config from 'config'; + +const LOCALE = 'en-EN'; +const DATE_OPTIONS = { year: 'numeric', month: 'long', day: 'numeric' }; + +export default function readme({ releaseDate, servicesCount, firstVersionDate, lastVersionDate }) { + return `# Open Terms Archive — ${title({ releaseDate })} + +${body({ servicesCount, firstVersionDate, lastVersionDate })}`; +} + +export function title({ releaseDate }) { + releaseDate = releaseDate.toLocaleDateString(LOCALE, DATE_OPTIONS); + + const title = config.get('dataset.title'); + + return `${title} — ${releaseDate} dataset`; +} + +export function body({ servicesCount, firstVersionDate, lastVersionDate }) { + firstVersionDate = firstVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS); + lastVersionDate = lastVersionDate.toLocaleDateString(LOCALE, DATE_OPTIONS); + + const versionsRepositoryURLGitLab = config.get('dataset.versionsRepositoryURLGitLab'); + + return `This dataset consolidates the contractual documents of ${servicesCount} service providers, in all their versions that were accessible online between ${firstVersionDate} and ${lastVersionDate}. + +This dataset is tailored for datascientists and other analysts. You can also explore all these versions interactively on [${versionsRepositoryURLGitLab}](${versionsRepositoryURLGitLab}). + +It has been generated with [Open Terms Archive](https://opentermsarchive.org). + +### Dataset format + +This dataset represents each version of a document as a separate [Markdown](https://spec.commonmark.org/0.30/) file, nested in a directory with the name of the service provider and in a directory with the name of the terms type. The filesystem layout will look like below. + +\`\`\` +├ README.md +├┬ Service provider 1 (e.g. Facebook) +│├┬ Terms type 1 (e.g. Terms of Service) +││├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-08-01T01-03-12Z.md) +┆┆┆ +││└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-10-03T08-12-25Z.md) +┆┆ +│└┬ Terms type X (e.g. Privacy Policy) +│ ├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md) +┆ ┆ +│ └ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md) +┆ +└┬ Service provider Y (e.g. Google) + ├┬ Terms type 1 (e.g. Developer Terms) + │├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2019-03-12T04-18-22Z.md) + ┆┆ + │└ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-12-04T22-47-05Z.md) + └┬ Terms type Z (e.g. Privacy Policy) + ┆ + ├ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-05-02T03-02-15Z.md) + ┆ + └ YYYY-DD-MMTHH-MM-SSZ.md (e.g. 2021-11-14T12-36-45Z.md) +\`\`\` + +### License + +This dataset is made available under an [Open Database (OdBL) License](https://opendatacommons.org/licenses/odbl/1.0/) by Open Terms Archive Contributors. +`; +} diff --git a/scripts/dataset/index.js b/scripts/dataset/index.js index 4c739686d..61db81c8b 100644 --- a/scripts/dataset/index.js +++ b/scripts/dataset/index.js @@ -6,10 +6,11 @@ import config from 'config'; import generateRelease from './export/index.js'; import logger from './logger/index.js'; import publishRelease from './publish/index.js'; +import publishReleaseGitLab from './publishGitLab/index.js'; export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }) { const releaseDate = new Date(); - const archiveName = fileName || `dataset-${config.get('@opentermsarchive/engine.dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`; + const archiveName = fileName || `dataset-${config.get('dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`; const archivePath = `${path.basename(archiveName, '.zip')}.zip`; // allow to pass filename or filename.zip as the archive name and have filename.zip as the result name logger.info('Start exporting dataset…'); @@ -24,13 +25,25 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName } logger.info('Start publishing dataset…'); - const releaseUrl = await publishRelease({ - archivePath, - releaseDate, - stats, - }); + if (typeof process.env.GITHUB_TOKEN !== 'undefined') { + const releaseUrl = await publishRelease({ + archivePath, + releaseDate, + stats, + }); - logger.info(`Dataset published to ${releaseUrl}`); + logger.info(`Dataset published to ${releaseUrl}`); + } + + if (typeof process.env.GITLAB_RELEASES_TOKEN !== 'undefined') { + const releaseUrl = await publishReleaseGitLab({ + archivePath, + releaseDate, + stats, + }); + + logger.info(`Dataset published to ${releaseUrl}`); + } if (!shouldRemoveLocalCopy) { return; diff --git a/scripts/dataset/publishGitLab/index.js b/scripts/dataset/publishGitLab/index.js new file mode 100644 index 000000000..48b7e8d64 --- /dev/null +++ b/scripts/dataset/publishGitLab/index.js @@ -0,0 +1,102 @@ +import fsApi from 'fs'; +import path from 'path'; +import url from 'url'; + +import axios from 'axios'; + +import config from 'config'; +import dotenv from 'dotenv'; +//import { Octokit } from 'octokit'; + +import FormData from 'form-data'; + +import * as readme from '../assets/README.templateGitLab.js'; + +dotenv.config(); + +const gitlabAPIUrl = "https://gitlab.com/api/v4"; +const gitlabUrl = "https://gitlab.com"; + +export default async function publishReleaseGitLab({ + archivePath, + releaseDate, + stats, +}) { + let projectId = null; + + // const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); + + const [owner, repo] = url + .parse(config.get('dataset.versionsRepositoryURLGitLab')) + .pathname.split('/') + .filter((component) => component); + const commonParams = { owner, repo }; + + try { + const repositoryPath = `${commonParams.owner}/${commonParams.repo}`; + const response = await axios.get( + `${gitlabAPIUrl}/projects/${encodeURIComponent(repositoryPath)}`, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`, + }, + }, + ); + projectId = response.data.id; + } catch (error) { + //logger.error(`🤖 Error while obtaining projectId: ${error}`); + projectId = null; + } + + const tagName = `${path.basename(archivePath, path.extname(archivePath))}`; // use archive filename as Git tag + + try { + // First, create the release + const releaseResponse = await axios.post( + `${gitlabAPIUrl}/projects/${projectId}/releases`, + { + ref: 'main', + tag_name: tagName, + name: readme.title({ releaseDate }), + description: readme.body(stats), + }, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + + const releaseId = releaseResponse.data.commit.id; + + // Then, upload the ZIP file as an asset to the release + const formData = new FormData(); + formData.append('name', archivePath); + formData.append( + 'url', + `${gitlabUrl}/${commonParams.owner}/${commonParams.repo}/-/archive/${tagName}/${archivePath}`, + ); + formData.append('file', fsApi.createReadStream(archivePath), { + filename: path.basename(archivePath), + }); + + const uploadResponse = await axios.post( + `${gitlabAPIUrl}/projects/${projectId}/releases/${tagName}/assets/links`, + formData, + { + headers: { + ...formData.getHeaders(), + Authorization: `Bearer ${process.env.GITLAB_RELEASES_TOKEN}`, + }, + }, + ); + + const releaseUrl = uploadResponse.data.direct_asset_url; + + return releaseUrl; + } catch (error) { + console.error('Failed to create release or upload ZIP file:', error); + throw error; + } +} diff --git a/src/index.js b/src/index.js index 2083e1802..157f6d22d 100644 --- a/src/index.js +++ b/src/index.js @@ -1,29 +1,24 @@ -import { createRequire } from 'module'; - import config from 'config'; import cron from 'croner'; -import cronstrue from 'cronstrue'; import Archivist from './archivist/index.js'; import logger from './logger/index.js'; import Notifier from './notifier/index.js'; import Reporter from './reporter/index.js'; - -const require = createRequire(import.meta.url); +import ReporterGitlab from './reporterGitlab/index.js'; export default async function track({ services, types, extractOnly, schedule }) { const archivist = new Archivist({ - recorderConfig: config.get('@opentermsarchive/engine.recorder'), - fetcherConfig: config.get('@opentermsarchive/engine.fetcher'), + recorderConfig: config.get('recorder'), + fetcherConfig: config.get('fetcher'), }); archivist.attach(logger); await archivist.initialize(); - const { version } = require('../package.json'); - - logger.info(`Start Open Terms Archive engine v${version}\n`); + console.log('Running from src'); + logger.info('Start Open Terms Archive\n'); if (services?.length) { services = services.filter(serviceId => { @@ -45,31 +40,36 @@ export default async function track({ services, types, extractOnly, schedule }) return; } - if (process.env.OTA_ENGINE_SENDINBLUE_API_KEY) { - try { - archivist.attach(new Notifier(archivist.services)); - } catch (error) { - logger.error('Cannot instantiate the Notifier module; it will be ignored:', error); - } + if (process.env.SENDINBLUE_API_KEY) { + archivist.attach(new Notifier(archivist.services)); } else { - logger.warn('Environment variable "OTA_ENGINE_SENDINBLUE_API_KEY" was not found; the Notifier module will be ignored'); + logger.warn('Environment variable "SENDINBLUE_API_KEY" was not found; the Notifier module will be ignored'); } - if (process.env.OTA_ENGINE_GITHUB_TOKEN) { - if (config.has('@opentermsarchive/engine.reporter.githubIssues.repositories.declarations')) { - try { - const reporter = new Reporter(config.get('@opentermsarchive/engine.reporter')); + if (process.env.GITHUB_TOKEN) { + if (config.has('reporter.githubIssues.repositories.declarations')) { + const reporter = new Reporter(config.get('reporter')); - await reporter.initialize(); - archivist.attach(reporter); - } catch (error) { - logger.error('Cannot instantiate the Reporter module; it will be ignored:', error); - } + await reporter.initialize(); + archivist.attach(reporter); } else { logger.warn('Configuration key "reporter.githubIssues.repositories.declarations" was not found; issues on the declarations repository cannot be created'); } } else { - logger.warn('Environment variable "OTA_ENGINE_GITHUB_TOKEN" was not found; the Reporter module will be ignored'); + logger.warn('Environment variable "GITHUB_TOKEN" was not found; the Reporter module will be ignored'); + } + + if (process.env.GITLAB_TOKEN) { + if (config.has('reporter.gitlabIssues.repositories.declarations')) { + const reporter = new ReporterGitlab(config.get('reporter')); + + await reporter.initialize(); + archivist.attach(reporter); + } else { + logger.warn('Configuration key "reporter.gitlabIssues.repositories.declarations" was not found; issues on the declarations repository cannot be created'); + } + } else { + logger.warn('Environment variable "GITLAB_TOKEN" was not found; the ReporterGitlab module will be ignored'); } if (!schedule) { @@ -78,15 +78,8 @@ export default async function track({ services, types, extractOnly, schedule }) return; } - const trackingSchedule = config.get('@opentermsarchive/engine.trackingSchedule'); - const humanReadableSchedule = cronstrue.toString(trackingSchedule); - logger.info('The scheduler is running…'); - logger.info(`Terms will be tracked ${humanReadableSchedule.toLowerCase()} in the timezone of this machine`); + logger.info('Terms will be tracked every six hours starting at half past midnight'); - cron( - trackingSchedule, - { protect: job => logger.warn(`Tracking scheduled at ${new Date().toISOString()} were blocked by an unfinished tracking started at ${job.currentRun().toISOString()}`) }, - () => archivist.track({ services, types }), - ); + cron('30 */6 * * *', () => archivist.track({ services, types })); } diff --git a/src/reporterGitlab/gitlab.js b/src/reporterGitlab/gitlab.js new file mode 100644 index 000000000..3e9b069f6 --- /dev/null +++ b/src/reporterGitlab/gitlab.js @@ -0,0 +1,324 @@ +import { createRequire } from 'module'; + +import logger from '../logger/index.js'; + +const require = createRequire(import.meta.url); + +export const MANAGED_BY_OTA_MARKER = '[managed by OTA]'; + +const gitlabUrl = "https://gitlab.com/api/v4"; + +export default class GitLab { + static ISSUE_STATE_CLOSED = 'closed'; + static ISSUE_STATE_OPEN = 'opened'; + static ISSUE_STATE_ALL = 'all'; + + constructor(repository) { + //const { version } = require('../../package.json'); + + const [owner, repo] = repository.split('/'); + + this.commonParams = { owner, repo }; + } + + async initialize() { + const axios = require('axios'); + + try { + const repositoryPath = `${this.commonParams.owner}/${this.commonParams.repo}`; + const response = await axios.get( + `${gitlabUrl}/projects/${encodeURIComponent(repositoryPath)}`, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + }, + }, + ); + this.projectId = response.data.id; + } catch (error) { + logger.error(`🤖 Error while obtaining projectId: ${error}`); + this.projectId = null; + } + this.MANAGED_LABELS = require('./labels.json'); + + const existingLabels = await this.getRepositoryLabels(); + const existingLabelsNames = existingLabels.map((label) => label.name); + const missingLabels = this.MANAGED_LABELS.filter( + (label) => !existingLabelsNames.includes(label.name), + ); + + if (missingLabels.length) { + logger.info( + `🤖 Following required labels are not present on the repository: ${missingLabels.map((label) => `"${label.name}"`).join(', ')}. Creating them…`, + ); + + for (const label of missingLabels) { + await this.createLabel({ + /* eslint-disable-line no-await-in-loop */ name: label.name, + color: label.color, + description: `${label.description} ${MANAGED_BY_OTA_MARKER}`, + }); + } + } + } + + async getRepositoryLabels() { + try { + const response = await fetch( + `${gitlabUrl}/projects/${this.projectId}/labels?with_counts=true`, + { + method: 'GET', + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + }, + }, + ); + if (response.status == 200) { + const labels = response.json(); + return labels; + } else { + logger.error( + `🤖 Failed to get labels: {response.status_code} - {response.text}`, + ); + return null; + } + } catch (error) { + logger.error(`🤖 Could get labels: ${error}`); + } + } + + async createLabel({ name, color, description }) { + const axios = require('axios'); + + try { + const label = { + name: name, + color: color, + description: description, + }; + const response = await axios.post( + `${gitlabUrl}/projects/${this.projectId}/labels`, + label, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + logger.info(`🤖 New label created: ${response.data.name}`); + } catch (error) { + logger.error(`🤖 Failed to create label: ${error}`); + } + } + + async createIssue({ title, description, labels }) { + const axios = require('axios'); + + try { + const issue = { + title: title, + labels: labels, + description: description, + }; + const response = await axios.post( + `${gitlabUrl}/projects/${this.projectId}/issues`, + issue, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + logger.info( + `🤖 Created GitLab issue #${response.data.iid} "${title}": ${response.data.web_url}`, + ); + + return response; + } catch (error) { + logger.error(`🤖 Could not create GitLab issue "${title}": ${error}`); + } + } + + async setIssueLabels({ issue, labels }) { + const axios = require('axios'); + + try { + const newLabels = { + labels: labels, + }; + const response = await axios.put( + `${gitlabUrl}/projects/${this.projectId}/issues/${issue.iid}`, + newLabels, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + + logger.info(`🤖 Updated labels to GitLab issue #${issue.iid}`); + } catch (error) { + logger.error( + `🤖 Could not update GitLab issue #${issue.iid} "${issue.title}": ${error}`, + ); + } + } + + async openIssue(issue) { + const axios = require('axios'); + + try { + const updateIssue = { + state_event: 'reopen', + }; + const response = await axios.put( + `${gitlabUrl}/projects/${this.projectId}/issues/${issue.iid}`, + updateIssue, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + + logger.info(`🤖 Opened GitLab issue #${issue.iid}`); + } catch (error) { + logger.error( + `🤖 Could not update GitLab issue #${issue.iid} "${issue.title}": ${error}`, + ); + } + } + + async closeIssue(issue) { + const axios = require('axios'); + + try { + const updateIssue = { + state_event: 'close', + }; + const response = await axios.put( + `${gitlabUrl}/projects/${this.projectId}/issues/${issue.iid}`, + updateIssue, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + + logger.info(`🤖 Closed GitLab issue #${issue.iid}`); + } catch (error) { + logger.error( + `🤖 Could not update GitLab issue #${issue.iid} "${issue.title}": ${error}`, + ); + } + } + + async getIssue({ title, ...searchParams }) { + const axios = require('axios'); + + try { + let apiUrl = `${gitlabUrl}/projects/${this.projectId}/issues?state=${searchParams.state}&per_page=100`; + if (searchParams.state == 'all') + apiUrl = `${gitlabUrl}/projects/${this.projectId}/issues?per_page=100`; + apiUrl = `${gitlabUrl}/projects/${this.projectId}/issues?search=${encodeURIComponent(title)}&per_page=100`; + const response = await axios.get(apiUrl, { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + }, + }); + const issues = response.data; + + const [issue] = issues.filter((item) => item.title === title); // since only one is expected, use the first one + + setTimeout(() => { + console.log(title + ' - ' + apiUrl); + }, 5000); + + return issue; + } catch (error) { + logger.error(`🤖 Could not find GitLab issue "${title}": ${error}`); + } + } + + async addCommentToIssue({ issue, comment }) { + const axios = require('axios'); + const body = { + body: comment, + }; + + try { + const response = await axios.post( + `${gitlabUrl}/projects/${this.projectId}/issues/${issue.iid}/notes`, + body, + { + headers: { + Authorization: `Bearer ${process.env.GITLAB_TOKEN}`, + 'Content-Type': 'application/json', + }, + }, + ); + logger.info( + `🤖 Added comment to GitLab issue #${issue.iid} ${issue.title}: ${response.data.id}`, + ); + + return response.data.body; + } catch (error) { + logger.error( + `🤖 Could not add comment to GitLab issue #${issue.iid} "${issue.title}": ${error}`, + ); + } + } + + async closeIssueWithCommentIfExists({ title, comment }) { + const openedIssue = await this.getIssue({ + title, + state: GitLab.ISSUE_STATE_OPEN, + }); + + if (!openedIssue) { + return; + } + + await this.addCommentToIssue({ issue: openedIssue, comment }); + + return this.closeIssue(openedIssue); + } + + async createOrUpdateIssue({ title, description, label }) { + const issue = await this.getIssue({ title, state: GitLab.ISSUE_STATE_ALL }); + + if (!issue) { + return this.createIssue({ title, description, labels: [label] }); + } + + if (issue.state == GitLab.ISSUE_STATE_CLOSED) { + await this.openIssue(issue); + } + + const managedLabelsNames = this.MANAGED_LABELS.map((label) => label.name); + const [managedLabel] = issue.labels.filter((label) => + managedLabelsNames.includes(label.name), + ); // it is assumed that only one specific reason for failure is possible at a time, making managed labels mutually exclusive + + if (managedLabel?.name == label) { + // if the label is already assigned to the issue, the error is redundant with the one already reported and no further action is necessary + return; + } + + const labelsNotManagedToKeep = issue.labels + .map((label) => label.name) + .filter((label) => !managedLabelsNames.includes(label)); + + await this.setIssueLabels({ + issue, + labels: [label, ...labelsNotManagedToKeep], + }); + await this.addCommentToIssue({ issue, comment: description }); + } +} diff --git a/src/reporterGitlab/index.js b/src/reporterGitlab/index.js new file mode 100644 index 000000000..f91c1f527 --- /dev/null +++ b/src/reporterGitlab/index.js @@ -0,0 +1,147 @@ +import mime from 'mime'; + +import { toISODateWithoutMilliseconds } from '../archivist/utils/date.js'; + +import GitLab from './gitlab.js'; + +const CONTRIBUTION_TOOL_URL = 'https://contribute.opentermsarchive.org/'; +const DOC_URL = 'https://docs.opentermsarchive.org'; +const REPO_URL = 'https://gitlab.com/'; + +const ERROR_MESSAGE_TO_ISSUE_LABEL_MAP = { + 'has no match': 'selectors', + 'HTTP code 404': 'location', + 'HTTP code 403': '403', + 'HTTP code 429': '429', + 'HTTP code 500': '500', + 'HTTP code 502': '502', + 'HTTP code 503': '503', + 'Timed out after': 'timeout', + 'getaddrinfo EAI_AGAIN': 'EAI_AGAIN', + 'getaddrinfo ENOTFOUND': 'ENOTFOUND', + 'Response is empty': 'empty response', + 'unable to verify the first certificate': 'first certificate', + 'certificate has expired': 'certificate expired', + 'maximum redirect reached': 'redirects', +}; + +function getLabelNameFromError(error) { + return ERROR_MESSAGE_TO_ISSUE_LABEL_MAP[Object.keys(ERROR_MESSAGE_TO_ISSUE_LABEL_MAP).find(substring => error.toString().includes(substring))] || 'to clarify'; +} + +// In the following class, it is assumed that each issue is managed using its title as a unique identifier +export default class Reporter { + constructor(config) { + const { repositories } = config.gitlabIssues; + + for (const repositoryType of Object.keys(repositories)) { + if (!repositories[repositoryType].includes('/') || repositories[repositoryType].includes('https://')) { + throw new Error(`Configuration entry "reporter.gitlabIssues.repositories.${repositoryType}" is expected to be a string in the format /, but received: "${repositories[repositoryType]}"`); + } + } + + this.gitlab = new GitLab(repositories.declarations); + this.repositories = repositories; + } + + async initialize() { + return this.gitlab.initialize(); + } + + async onVersionRecorded(version) { + await this.gitlab.closeIssueWithCommentIfExists({ + title: Reporter.generateTitleID(version.serviceId, version.termsType), + comment: `### Tracking resumed + +A new version has been recorded.`, + }); + } + + async onVersionNotChanged(version) { + await this.gitlab.closeIssueWithCommentIfExists({ + title: Reporter.generateTitleID(version.serviceId, version.termsType), + comment: `### Tracking resumed + +No changes were found in the last run, so no new version has been recorded.`, + }); + } + + async onFirstVersionRecorded(version) { + return this.onVersionRecorded(version); + } + + async onInaccessibleContent(error, terms) { + await this.gitlab.createOrUpdateIssue({ + title: Reporter.generateTitleID(terms.service.id, terms.type), + description: this.generateDescription({ error, terms }), + label: getLabelNameFromError(error), + }); + } + + generateDescription({ error, terms }) { + const date = new Date(); + const currentFormattedDate = date.toLocaleDateString('en-GB', { year: 'numeric', month: 'long', day: 'numeric', hour: 'numeric', minute: 'numeric', second: 'numeric', timeZoneName: 'short', timeZone: 'UTC' }); + const validUntil = toISODateWithoutMilliseconds(date); + + const hasSnapshots = terms.sourceDocuments.every(sourceDocument => sourceDocument.snapshotId); + + const contributionToolParams = new URLSearchParams({ + json: JSON.stringify(terms.toPersistence()), + destination: this.repositories.declarations, + step: '2', + }); + const contributionToolUrl = `${CONTRIBUTION_TOOL_URL}?${contributionToolParams}`; + + const latestDeclarationLink = `[Latest declaration](${REPO_URL}/${this.repositories.declarations}/-/blob/main/declarations/${encodeURIComponent(terms.service.name)}.json)`; + const latestVersionLink = `[Latest version](${REPO_URL}/${this.repositories.versions}/-/blob/main/${encodeURIComponent(terms.service.name)}/${encodeURIComponent(terms.type)}.md)`; + const snapshotsBaseUrl = `${REPO_URL}/${this.repositories.snapshots}/-/blob/main/${encodeURIComponent(terms.service.name)}/${encodeURIComponent(terms.type)}`; + const latestSnapshotsLink = terms.hasMultipleSourceDocuments + ? `Latest snapshots:\n - ${terms.sourceDocuments.map(sourceDocument => `[${sourceDocument.id}](${snapshotsBaseUrl}.%20#${sourceDocument.id}.${mime.getExtension(sourceDocument.mimeType)})`).join('\n - ')}` + : `[Latest snapshot](${snapshotsBaseUrl}.${mime.getExtension(terms.sourceDocuments[0].mimeType)})`; + + /* eslint-disable no-irregular-whitespace */ + return ` +### No version of the \`${terms.type}\` of service \`${terms.service.name}\` is recorded anymore since ${currentFormattedDate} + +The source document${terms.hasMultipleSourceDocuments ? 's have' : ' has'}${hasSnapshots ? ' ' : ' not '}been recorded in ${terms.hasMultipleSourceDocuments ? 'snapshots' : 'a snapshot'}, ${hasSnapshots ? 'but ' : 'thus '} no version can be [extracted](${DOC_URL}/#tracking-terms). +${hasSnapshots ? 'After correction, it might still be possible to recover the missed versions.' : ''} + +### What went wrong + +- ${error.reasons.join('\n- ')} + +### How to resume tracking + +First of all, check if the source documents are accessible through a web browser: + +- [ ] ${terms.sourceDocuments.map(sourceDocument => `[${sourceDocument.location}](${sourceDocument.location})`).join('\n- [ ] ')} + +#### If the source documents are accessible through a web browser + +[Edit the declaration](${contributionToolUrl}): +- Try updating the selectors. +- Try switching client scripts on with expert mode. + +#### If the source documents are not accessible anymore + +- If the source documents have moved, find their new location and [update it](${contributionToolUrl}). +- If these terms have been removed, move them from the declaration to its [history file](${DOC_URL}/contributing-terms/#service-history), using \`${validUntil}\` as the \`validUntil\` value. +- If the service has closed, move the entire contents of the declaration to its [history file](${DOC_URL}/contributing-terms/#service-history), using \`${validUntil}\` as the \`validUntil\` value. + +#### If none of the above works + +If the source documents are accessible in a browser but fetching them always fails from the Open Terms Archive server, this is most likely because the service provider has blocked the Open Terms Archive robots from accessing its content. In this case, updating the declaration will not enable resuming tracking. Only an agreement with the service provider, an engine upgrade, or some technical workarounds provided by the administrator of this collection’s server might resume tracking. + +### References + +- ${latestDeclarationLink} +${this.repositories.versions ? `- ${latestVersionLink}` : ''} +${this.repositories.snapshots ? `- ${latestSnapshotsLink}` : ''} +`; + /* eslint-enable no-irregular-whitespace */ + } + + static generateTitleID(serviceId, type) { + return `\`${serviceId}\` ‧ \`${type}\` ‧ not tracked anymore`; + } +} diff --git a/src/reporterGitlab/labels.json b/src/reporterGitlab/labels.json new file mode 100644 index 000000000..8d73f4e8e --- /dev/null +++ b/src/reporterGitlab/labels.json @@ -0,0 +1,77 @@ +[ + { + "name": "403", + "color": "#0b08a0", + "description": "Fetching fails with a 403 (forbidden) HTTP code" + }, + { + "name": "429", + "color": "#0b08a0", + "description": "Fetching fails with a 429 (too many requests) HTTP code" + }, + { + "name": "500", + "color": "#0b08a0", + "description": "Fetching fails with a 500 (internal server error) HTTP code" + }, + { + "name": "502", + "color": "#0b08a0", + "description": "Fetching fails with a 502 (bad gateway) HTTP code" + }, + { + "name": "503", + "color": "#0b08a0", + "description": "Fetching fails with a 503 (service unavailable) HTTP code" + }, + { + "name": "certificate expired", + "color": "#0b08a0", + "description": "Fetching fails because the domain SSL certificate has expired" + }, + { + "name": "EAI_AGAIN", + "color": "#0b08a0", + "description": "Fetching fails because the domain fails to resolve on DNS" + }, + { + "name": "ENOTFOUND", + "color": "#0b08a0", + "description": "Fetching fails because the domain fails to resolve on DNS" + }, + { + "name": "empty response", + "color": "#0b08a0", + "description": "Fetching fails with a “response is empty” error" + }, + { + "name": "first certificate", + "color": "#0b08a0", + "description": "Fetching fails with an “unable to verify the first certificate” error" + }, + { + "name": "redirects", + "color": "#0b08a0", + "description": "Fetching fails with a “too many redirects” error" + }, + { + "name": "timeout", + "color": "#0b08a0", + "description": "Fetching fails with a timeout error" + }, + { + "name": "to clarify", + "color": "#0496ff", + "description": "Default failure label" + }, + { + "name": "selectors", + "color": "#FBCA04", + "description": "Extraction selectors are outdated" + }, + { + "name": "location", + "color": "#FBCA04", + "description": "Fetch location is outdated" + } +] diff --git a/src/reporterGitlab/labels.test.js b/src/reporterGitlab/labels.test.js new file mode 100644 index 000000000..0b4e9f75b --- /dev/null +++ b/src/reporterGitlab/labels.test.js @@ -0,0 +1,30 @@ +import { createRequire } from 'module'; + +import chai from 'chai'; + +import { MANAGED_BY_OTA_MARKER } from './gitlab.js'; + +const require = createRequire(import.meta.url); + +const { expect } = chai; +const labels = require('./labels.json'); + +const GITLAB_LABEL_DESCRIPTION_MAX_LENGTH = 255; + +describe('Reporter GitLab labels', () => { + labels.forEach(label => { + describe(`"${label.name}"`, () => { + it('complies with the GitLab character limit for descriptions', () => { + const descriptionLength = label.description.length + MANAGED_BY_OTA_MARKER.length; + + expect(descriptionLength).to.be.lessThan(GITLAB_LABEL_DESCRIPTION_MAX_LENGTH); + }); + + it('complies with the GitHub constraints for color', () => { + const validHexColorRegex = /^\#[0-9a-fA-F]{6}$/; // Regex for a valid 6-digit hexadecimal color code with the `#` + + expect(validHexColorRegex.test(label.color)).to.be.true; + }); + }); + }); +});