From 9e1bbddf1a1c7aa5e636ed8563a2a72da323bc46 Mon Sep 17 00:00:00 2001 From: Stefan Peters Date: Wed, 21 Feb 2024 10:36:43 +0100 Subject: [PATCH] Extend data script to manage vocabularies for all instances (#4) Filtering is still missing. --- configs/vocabularies.txt | 86 +++++++++++++++++++++++++ src/data.ts | 133 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 configs/vocabularies.txt diff --git a/configs/vocabularies.txt b/configs/vocabularies.txt new file mode 100644 index 0000000..3942583 --- /dev/null +++ b/configs/vocabularies.txt @@ -0,0 +1,86 @@ +# This file lists vocabularies and their data +# that should be imported into our jskos-server instances. + +# Format: +# jskos-server-instance vocabulary-uri-or-path concept-data-url-or-path [concept-data-url-or-path ...] + +# Example: +# jskos-server http://bartoc.org/en/node/1043 thema/thema-concepts-1.4.ndjson + +### Main instance ### + +# Import scheme entries for all coli-conc vocabularies (so that they are supported by mappings and concordances) +jskos-server https://bartoc.org/api/voc?partOf=http%3A%2F%2Fbartoc.org%2Fen%2Fnode%2F18926&limit=500 + +jskos-server http://bartoc.org/en/node/520 dfg/dfg-2020.concepts.ndjson +jskos-server http://bartoc.org/en/node/730 bos/bos-concepts.ndjson +jskos-server http://bartoc.org/en/node/742 oesoeb/oesoeb-concepts.ndjson +jskos-server http://bartoc.org/en/node/1339 htwg/htwg-concepts.ndjson +jskos-server http://bartoc.org/en/node/1042 fos/fos-concepts.ndjson +jskos-server http://bartoc.org/en/node/1043 thema/thema-concepts-1.4.ndjson +jskos-server http://bartoc.org/en/node/1050 skj/skj-concepts.ndjson +jskos-server http://bartoc.org/en/node/1094 oefos/oefos-concepts.ndjson +jskos-server http://bartoc.org/en/node/1232 nomenclature/nomenclature.concepts.ndjson +jskos-server http://bartoc.org/en/node/1324 seb/seb-concepts.ndjson +jskos-server http://bartoc.org/en/node/1822 nomisma/nomisma-concepts.ndjson +jskos-server http://bartoc.org/en/node/1986 mv/mv-concepts.ndjson +jskos-server http://bartoc.org/en/node/18797 ixtheo/ixtheo.ndjson +jskos-server http://bartoc.org/en/node/18915 zdb-fgs/zdb-fgs-concepts.ndjson +jskos-server http://bartoc.org/en/node/18920 hochschulfaechersystematik/faechersystematik.concepts.ndjson +jskos-server http://bartoc.org/en/node/18928 ssg/ssg-concepts.ndjson +jskos-server http://bartoc.org/en/node/20049 sdnb/sdnb-concepts.ndjson +jskos-server http://bartoc.org/en/node/20298 nsk/fachgruppen.ndjson nsk/sachschluessel.ndjson +jskos-server http://bartoc.org/en/node/20400 bc/bc-concepts.ndjson +jskos-server http://bartoc.org/en/node/20404 retrohab/retrohab-concepts.ndjson +jskos-server http://bartoc.org/en/node/20405 gessner/gessner-concepts.ndjson +jskos-server http://bartoc.org/en/node/20406 brunfels/brunfels-concepts.ndjson +jskos-server http://bartoc.org/en/node/20407 thuana/thuana-concepts.ndjson +jskos-server http://bartoc.org/en/node/20430 obv/obv-concepts.ndjson +jskos-server http://bartoc.org/en/node/20050 fkdigbib/fkdigbib-concepts.ndjson +jskos-server http://bartoc.org/en/node/1051 ssd/ssd-concepts.ndjson +jskos-server http://bartoc.org/en/node/743 essb/essb-concepts.ndjson +jskos-server http://bartoc.org/en/node/20446 fivs/fivs-concepts.ndjson +jskos-server http://bartoc.org/en/node/20447 fivr/fivr-concepts.ndjson +jskos-server http://bartoc.org/en/node/220 bssc/bssc-concepts.ndjson + +# DDC German +# TODO: Use jskos-data path if possible +jskos-server http://bartoc.org/en/node/241 /home/stefanp/ddc/ddc_23_de_2018-07-04_no-notes.ndjson + +# MSC 2010+2020 +jskos-server http://bartoc.org/en/node/20396 msc2020/msc2020-concepts_neu.ndjson + +# TODO: Can we handle MSC2020 via BARTOC as well? I think there was an issue with overlapping URIs. +jskos-server http://bartoc.org/en/node/474 msc2010/msc2010.ndjson +jskos-server msc2020/msc2020-scheme.json msc2020/msc2020-concepts.ndjson + +# GND - scheme data only +jskos-server http://bartoc.org/en/node/430 +# BK - scheme data only +jskos-server http://bartoc.org/en/node/18785 +# RVK - scheme data only +jskos-server http://bartoc.org/en/node/533 +# Wikidata - scheme data only +jskos-server http://bartoc.org/en/node/1940 +# STW - scheme data only +jskos-server http://bartoc.org/en/node/313 +# EuroVoc - scheme data only +jskos-server http://bartoc.org/en/node/15 +# Iconclass - scheme data only +jskos-server http://bartoc.org/en/node/459 +# LSCH - scheme data only +jskos-server http://bartoc.org/en/node/454 +# LCNAF - scheme data only +jskos-server http://bartoc.org/en/node/18536 + +# TODO: SDNB vs. DDC Sachgruppen. SDNB concepts have the latter as scheme URI. +jskos-server http://bartoc.org/en/node/20049 sdnb/sdnb-concepts.ndjson +jskos-server http://bartoc.org/en/node/18497 + +### RVK ### +jskos-server-rvk http://bartoc.org/en/node/533 rvk/2022_3/rvko_2022_3.ndjson + +### Dev ### + +# Import scheme entries for all coli-conc vocabularies (so that they are supported by mappings and concordances) +jskos-server-dev https://bartoc.org/api/voc?partOf=http%3A%2F%2Fbartoc.org%2Fen%2Fnode%2F18926&limit=500 diff --git a/src/data.ts b/src/data.ts index 03d0c4f..06a8cea 100755 --- a/src/data.ts +++ b/src/data.ts @@ -1,9 +1,16 @@ -#!/usr/bin/env -S deno run --allow-env --allow-read --allow-run --allow-sys --ext=ts --lock=${COLI_CONC_BASE}/src/deno.lock +#!/usr/bin/env -S deno run --allow-env --allow-read --allow-run --allow-net --allow-sys --ext=ts --lock=${COLI_CONC_BASE}/src/deno.lock /** * Script to manage data in jskos-server instances. */ +/** + * TODOs: + * - Support .env files for determining baseUrl (currently not important) + * - Fix issue with reset requiring confirmation for every single entry + * - Add filtering + */ + Deno.env.set("FORCE_COLOR", "2") import { existsSync } from "https://deno.land/std/fs/mod.ts" import { parseArgs } from "https://deno.land/std@0.207.0/cli/parse_args.ts" @@ -18,12 +25,9 @@ const flags = parseArgs(Deno.args, { "reset": "r", "data": "d", }, - default: { - "data": "atasda", - }, }) -const [command, target, ...args]: string[] = flags._ +const [command, target]: string[] = flags._ const availableCommands = [ "import", @@ -35,6 +39,10 @@ import { parse as parseYaml } from "https://deno.land/std@0.207.0/yaml/mod.ts" import { getEnv } from "../src/utils.ts" const { servicePath, targetPath, uid, gid, basePath, dataPath, configsPath, secretsPath } = getEnv(target) +if (!flags.data) { + flags.data = `${configsPath}/vocabularies.txt` +} + // Set environment for `docker compose` calls import process from "node:process" process.env.UID = uid @@ -111,6 +119,9 @@ const targetService = availableTargets.find(t => t.name === target) if (targetService) { // ##### Run import/reset script for a particular instance of JSKOS Server ##### + // Forward all arguments after target + const [,, ...args] = Deno.args.slice(Deno.args.findIndex(arg => arg === target) - 1) + console.log(`Running ${command} script for ${targetService.name} (Docker service ${targetService.service}) with params:`) args.forEach(arg => console.log(` ${arg}`)) @@ -130,7 +141,7 @@ if (targetService) { await cd(targetPath) try { - await $`docker compose run ${runArgs} ${targetService.service} /usr/src/app/bin/${command}.js ${args}` + await $`docker compose run -it ${runArgs} ${targetService.service} /usr/src/app/bin/${command}.js ${args}` } catch (error) { console.error() console.error(`An error occurred during import attempt. Details should be in the output above. (exit code: ${error.exitCode})`) @@ -143,6 +154,112 @@ if (targetService) { Deno.exit(1) } else { // ##### Run import script that uses config/vocabularies.txt as data basis ##### - console.warn("Warning: data import (without service) is not yet implemented.") - Deno.exit(0) + const shouldProceed = confirm("data import (without target) is not fully implemented yet, particularly the filtering option. Continue anyway?") + if (!shouldProceed) { + Deno.exit(0) + } + + const jskosDataPath = `${dataPath}/jskos-data` + + // TODO: Reset script will currently always ask for confirmation. Using `yes` or `stdin.write` do not work. + // if (flags.reset && !flags.g) { + // const shouldProceed = confirm("Are you sure you want to reset all vocabularies? There will be no further confirmation.") + // if (!shouldProceed) { + // console.log("Exiting...") + // Deno.exit(0) + // } + // } + + const data = (await Deno.readTextFile(flags.data)) + .split("\n") + .map(line => line.trim()) + .filter(line => line && !line.startsWith("#")) + .map(line => { + const lineParts = line.split(/\s+/) + for (let i = 1; i < lineParts.length; i += 1) { + if (!lineParts[i].startsWith("/") && !lineParts[i].startsWith("http://") && !lineParts[i].startsWith("https://")) { + lineParts[i] = `${jskosDataPath}/${lineParts[i]}` + } + } + const [target, scheme, ...conceptPaths] = lineParts + return { + target, + scheme, + conceptPaths, + } + }) + + for (let { target, scheme, conceptPaths } of data) { + console.log(`==================== Importing ${scheme} into ${target} ====================`) + console.log("Target:", target) + console.log("Scheme URL/URI:", scheme) + if (conceptPaths.length) { + console.log("Concept paths/URLs:") + conceptPaths.forEach(path => console.log(`- ${path}`)) + } + console.log("Force:", flags.force) + console.log("Reset:", flags.reset) + console.log() + const targetService = availableTargets.find(t => t.name === target) + if (!targetService) { + console.error(`Error: Target with name \`${target}\` does not exist, skipping.\n`) + continue + } + let uri + if (scheme.startsWith("http://bartoc.org")) { + uri = scheme + scheme = `https://bartoc.org/api/data?uri=${uri}` + } + if (flags.reset && uri) { + await $`data reset ${target} -s ${uri}` + } + await $`data import ${target} scheme ${scheme}` + + // Check if concepts exist already + if (uri && !flags.force && !flags.reset && conceptPaths.length) { + const baseUrl = await getBaseUrlForTarget(target) + if (!baseUrl) { + console.warn(`Warning: Can't check whether concepts for ${uri} in ${target} exist. Importing anyway.`) + } else { + const response = await fetch(`${baseUrl}/voc?uri=${encodeURIComponent(uri)}`) + const json = await response.json() + if (json?.[0]?.concepts?.length > 0) { + console.warn(`Concept data for ${uri} already exists. Run script with -f to import anyway.`) + continue + } + } + } + + for (const path of conceptPaths) { + await $`data import ${target} concepts ${path}` + } + + console.log() + } +} + +async function getBaseUrlForTarget(target: string) { + // TODO: Support .env files as well + const possiblePaths = [ + `${configsPath}/${target}.json`, + `${configsPath}/${target}/config.json`, + `${configsPath}/${target}/jskos-server.json`, + ] + for (const path of possiblePaths) { + try { + const config = JSON.parse(await Deno.readTextFile(path)) + let baseUrl = config.baseUrl + if (!baseUrl) { + console.error(`getBaseUrlForTarget: Read config file at ${path}, but it does not contain baseUrl`) + continue + } + if (!baseUrl.endsWith("/")) { + baseUrl += "/" + } + return baseUrl + } catch (_error) { + // Ignore + } + } + return null }