Skip to content

Commit

Permalink
Extend data script to manage vocabularies for all instances (#4)
Browse files Browse the repository at this point in the history
Filtering is still missing.
  • Loading branch information
stefandesu committed Feb 21, 2024
1 parent 643e327 commit 9e1bbdd
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 8 deletions.
86 changes: 86 additions & 0 deletions configs/vocabularies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# This file lists vocabularies and their data
# that should be imported into our jskos-server instances.

# Format:
# jskos-server-instance vocabulary-uri-or-path concept-data-url-or-path [concept-data-url-or-path ...]

# Example:
# jskos-server http://bartoc.org/en/node/1043 thema/thema-concepts-1.4.ndjson

### Main instance ###

# Import scheme entries for all coli-conc vocabularies (so that they are supported by mappings and concordances)
jskos-server https://bartoc.org/api/voc?partOf=http%3A%2F%2Fbartoc.org%2Fen%2Fnode%2F18926&limit=500

jskos-server http://bartoc.org/en/node/520 dfg/dfg-2020.concepts.ndjson
jskos-server http://bartoc.org/en/node/730 bos/bos-concepts.ndjson
jskos-server http://bartoc.org/en/node/742 oesoeb/oesoeb-concepts.ndjson
jskos-server http://bartoc.org/en/node/1339 htwg/htwg-concepts.ndjson
jskos-server http://bartoc.org/en/node/1042 fos/fos-concepts.ndjson
jskos-server http://bartoc.org/en/node/1043 thema/thema-concepts-1.4.ndjson
jskos-server http://bartoc.org/en/node/1050 skj/skj-concepts.ndjson
jskos-server http://bartoc.org/en/node/1094 oefos/oefos-concepts.ndjson
jskos-server http://bartoc.org/en/node/1232 nomenclature/nomenclature.concepts.ndjson
jskos-server http://bartoc.org/en/node/1324 seb/seb-concepts.ndjson
jskos-server http://bartoc.org/en/node/1822 nomisma/nomisma-concepts.ndjson
jskos-server http://bartoc.org/en/node/1986 mv/mv-concepts.ndjson
jskos-server http://bartoc.org/en/node/18797 ixtheo/ixtheo.ndjson
jskos-server http://bartoc.org/en/node/18915 zdb-fgs/zdb-fgs-concepts.ndjson
jskos-server http://bartoc.org/en/node/18920 hochschulfaechersystematik/faechersystematik.concepts.ndjson
jskos-server http://bartoc.org/en/node/18928 ssg/ssg-concepts.ndjson
jskos-server http://bartoc.org/en/node/20049 sdnb/sdnb-concepts.ndjson
jskos-server http://bartoc.org/en/node/20298 nsk/fachgruppen.ndjson nsk/sachschluessel.ndjson
jskos-server http://bartoc.org/en/node/20400 bc/bc-concepts.ndjson
jskos-server http://bartoc.org/en/node/20404 retrohab/retrohab-concepts.ndjson
jskos-server http://bartoc.org/en/node/20405 gessner/gessner-concepts.ndjson
jskos-server http://bartoc.org/en/node/20406 brunfels/brunfels-concepts.ndjson
jskos-server http://bartoc.org/en/node/20407 thuana/thuana-concepts.ndjson
jskos-server http://bartoc.org/en/node/20430 obv/obv-concepts.ndjson
jskos-server http://bartoc.org/en/node/20050 fkdigbib/fkdigbib-concepts.ndjson
jskos-server http://bartoc.org/en/node/1051 ssd/ssd-concepts.ndjson
jskos-server http://bartoc.org/en/node/743 essb/essb-concepts.ndjson
jskos-server http://bartoc.org/en/node/20446 fivs/fivs-concepts.ndjson
jskos-server http://bartoc.org/en/node/20447 fivr/fivr-concepts.ndjson
jskos-server http://bartoc.org/en/node/220 bssc/bssc-concepts.ndjson

# DDC German
# TODO: Use jskos-data path if possible
jskos-server http://bartoc.org/en/node/241 /home/stefanp/ddc/ddc_23_de_2018-07-04_no-notes.ndjson

# MSC 2010+2020
jskos-server http://bartoc.org/en/node/20396 msc2020/msc2020-concepts_neu.ndjson

# TODO: Can we handle MSC2020 via BARTOC as well? I think there was an issue with overlapping URIs.
jskos-server http://bartoc.org/en/node/474 msc2010/msc2010.ndjson
jskos-server msc2020/msc2020-scheme.json msc2020/msc2020-concepts.ndjson

# GND - scheme data only
jskos-server http://bartoc.org/en/node/430
# BK - scheme data only
jskos-server http://bartoc.org/en/node/18785
# RVK - scheme data only
jskos-server http://bartoc.org/en/node/533
# Wikidata - scheme data only
jskos-server http://bartoc.org/en/node/1940
# STW - scheme data only
jskos-server http://bartoc.org/en/node/313
# EuroVoc - scheme data only
jskos-server http://bartoc.org/en/node/15
# Iconclass - scheme data only
jskos-server http://bartoc.org/en/node/459
# LSCH - scheme data only
jskos-server http://bartoc.org/en/node/454
# LCNAF - scheme data only
jskos-server http://bartoc.org/en/node/18536

# TODO: SDNB vs. DDC Sachgruppen. SDNB concepts have the latter as scheme URI.
jskos-server http://bartoc.org/en/node/20049 sdnb/sdnb-concepts.ndjson
jskos-server http://bartoc.org/en/node/18497

### RVK ###
jskos-server-rvk http://bartoc.org/en/node/533 rvk/2022_3/rvko_2022_3.ndjson

### Dev ###

# Import scheme entries for all coli-conc vocabularies (so that they are supported by mappings and concordances)
jskos-server-dev https://bartoc.org/api/voc?partOf=http%3A%2F%2Fbartoc.org%2Fen%2Fnode%2F18926&limit=500
133 changes: 125 additions & 8 deletions src/data.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/usr/bin/env -S deno run --allow-env --allow-read --allow-run --allow-sys --ext=ts --lock=${COLI_CONC_BASE}/src/deno.lock
#!/usr/bin/env -S deno run --allow-env --allow-read --allow-run --allow-net --allow-sys --ext=ts --lock=${COLI_CONC_BASE}/src/deno.lock

/**
* Script to manage data in jskos-server instances.
*/

/**
* TODOs:
* - Support .env files for determining baseUrl (currently not important)
* - Fix issue with reset requiring confirmation for every single entry
* - Add filtering
*/

Deno.env.set("FORCE_COLOR", "2")
import { existsSync } from "https://deno.land/std/fs/mod.ts"
import { parseArgs } from "https://deno.land/[email protected]/cli/parse_args.ts"
Expand All @@ -18,12 +25,9 @@ const flags = parseArgs(Deno.args, {
"reset": "r",
"data": "d",
},
default: {
"data": "atasda",
},
})

const [command, target, ...args]: string[] = flags._
const [command, target]: string[] = flags._

const availableCommands = [
"import",
Expand All @@ -35,6 +39,10 @@ import { parse as parseYaml } from "https://deno.land/[email protected]/yaml/mod.ts"
import { getEnv } from "../src/utils.ts"
const { servicePath, targetPath, uid, gid, basePath, dataPath, configsPath, secretsPath } = getEnv(target)

if (!flags.data) {
flags.data = `${configsPath}/vocabularies.txt`
}

// Set environment for `docker compose` calls
import process from "node:process"
process.env.UID = uid
Expand Down Expand Up @@ -111,6 +119,9 @@ const targetService = availableTargets.find(t => t.name === target)

if (targetService) {
// ##### Run import/reset script for a particular instance of JSKOS Server #####
// Forward all arguments after target
const [,, ...args] = Deno.args.slice(Deno.args.findIndex(arg => arg === target) - 1)

console.log(`Running ${command} script for ${targetService.name} (Docker service ${targetService.service}) with params:`)
args.forEach(arg => console.log(` ${arg}`))

Expand All @@ -130,7 +141,7 @@ if (targetService) {

await cd(targetPath)
try {
await $`docker compose run ${runArgs} ${targetService.service} /usr/src/app/bin/${command}.js ${args}`
await $`docker compose run -it ${runArgs} ${targetService.service} /usr/src/app/bin/${command}.js ${args}`
} catch (error) {
console.error()
console.error(`An error occurred during import attempt. Details should be in the output above. (exit code: ${error.exitCode})`)
Expand All @@ -143,6 +154,112 @@ if (targetService) {
Deno.exit(1)
} else {
// ##### Run import script that uses config/vocabularies.txt as data basis #####
console.warn("Warning: data import (without service) is not yet implemented.")
Deno.exit(0)
const shouldProceed = confirm("data import (without target) is not fully implemented yet, particularly the filtering option. Continue anyway?")
if (!shouldProceed) {
Deno.exit(0)
}

const jskosDataPath = `${dataPath}/jskos-data`

// TODO: Reset script will currently always ask for confirmation. Using `yes` or `stdin.write` do not work.
// if (flags.reset && !flags.g) {
// const shouldProceed = confirm("Are you sure you want to reset all vocabularies? There will be no further confirmation.")
// if (!shouldProceed) {
// console.log("Exiting...")
// Deno.exit(0)
// }
// }

const data = (await Deno.readTextFile(flags.data))
.split("\n")
.map(line => line.trim())
.filter(line => line && !line.startsWith("#"))
.map(line => {
const lineParts = line.split(/\s+/)
for (let i = 1; i < lineParts.length; i += 1) {
if (!lineParts[i].startsWith("/") && !lineParts[i].startsWith("http://") && !lineParts[i].startsWith("https://")) {
lineParts[i] = `${jskosDataPath}/${lineParts[i]}`
}
}
const [target, scheme, ...conceptPaths] = lineParts
return {
target,
scheme,
conceptPaths,
}
})

for (let { target, scheme, conceptPaths } of data) {
console.log(`==================== Importing ${scheme} into ${target} ====================`)
console.log("Target:", target)
console.log("Scheme URL/URI:", scheme)
if (conceptPaths.length) {
console.log("Concept paths/URLs:")
conceptPaths.forEach(path => console.log(`- ${path}`))
}
console.log("Force:", flags.force)
console.log("Reset:", flags.reset)
console.log()
const targetService = availableTargets.find(t => t.name === target)
if (!targetService) {
console.error(`Error: Target with name \`${target}\` does not exist, skipping.\n`)
continue
}
let uri
if (scheme.startsWith("http://bartoc.org")) {
uri = scheme
scheme = `https://bartoc.org/api/data?uri=${uri}`
}
if (flags.reset && uri) {
await $`data reset ${target} -s ${uri}`
}
await $`data import ${target} scheme ${scheme}`

// Check if concepts exist already
if (uri && !flags.force && !flags.reset && conceptPaths.length) {
const baseUrl = await getBaseUrlForTarget(target)
if (!baseUrl) {
console.warn(`Warning: Can't check whether concepts for ${uri} in ${target} exist. Importing anyway.`)
} else {
const response = await fetch(`${baseUrl}/voc?uri=${encodeURIComponent(uri)}`)
const json = await response.json()
if (json?.[0]?.concepts?.length > 0) {
console.warn(`Concept data for ${uri} already exists. Run script with -f to import anyway.`)
continue
}
}
}

for (const path of conceptPaths) {
await $`data import ${target} concepts ${path}`
}

console.log()
}
}

async function getBaseUrlForTarget(target: string) {
// TODO: Support .env files as well
const possiblePaths = [
`${configsPath}/${target}.json`,
`${configsPath}/${target}/config.json`,
`${configsPath}/${target}/jskos-server.json`,
]
for (const path of possiblePaths) {
try {
const config = JSON.parse(await Deno.readTextFile(path))
let baseUrl = config.baseUrl
if (!baseUrl) {
console.error(`getBaseUrlForTarget: Read config file at ${path}, but it does not contain baseUrl`)
continue
}
if (!baseUrl.endsWith("/")) {
baseUrl += "/"
}
return baseUrl
} catch (_error) {
// Ignore
}
}
return null
}

0 comments on commit 9e1bbdd

Please sign in to comment.