diff --git a/housekeeping/build.gradle b/housekeeping/build.gradle new file mode 100755 index 0000000000..9239c707df --- /dev/null +++ b/housekeeping/build.gradle @@ -0,0 +1,61 @@ +apply plugin: 'war' +apply plugin: 'groovy' +apply from: '../gretty.plugin' + +repositories { + mavenCentral() +} + +// Don't blame me for this TRAVESTY. It is a necessity because of the versioning of xml-apis (2.0.2 which gradle otherwise chooses is OLDER (and broken) despite the version.) +configurations.all { + resolutionStrategy { + force "xml-apis:xml-apis:1.4.01" + } +} + +sourceSets { + main { + java { srcDirs = [] } + groovy { srcDirs = ['src/main/java', 'src/main/groovy'] } + } + test { + groovy { srcDir 'src/test/groovy/' } + } +} + + +dependencies { + // XL dependencies + implementation(project(':whelk-core')) + + // Logging + implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: "${log4jVersion}" + implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: "${log4jVersion}" + + // metrics + implementation "io.prometheus:simpleclient:${prometheusVersion}" + implementation "io.prometheus:simpleclient_servlet:${prometheusVersion}" + + //implementation 'org.apache.commons:commons-lang3:3.3.2' + implementation "org.codehaus.groovy:groovy:${groovyVersion}" + + // Email + implementation group: 'org.simplejavamail', name: 'simple-java-mail', version: '8.2.0' + + // Cron + implementation group: 'it.sauronsoftware.cron4j', name: 'cron4j', version: '2.2.5' + +} + +gretty { + jvmArgs = ['-XX:+UseParallelGC'] + systemProperties = ['xl.secret.properties': System.getProperty("xl.secret.properties")] + httpPort = 8589 + scanInterval = 0 + afterEvaluate { + appRunDebug { + debugPort = 5006 + debugSuspend = false + } + } +} diff --git a/housekeeping/src/main/groovy/whelk/housekeeping/NotificationGenerator.groovy b/housekeeping/src/main/groovy/whelk/housekeeping/NotificationGenerator.groovy new file mode 100644 index 0000000000..8e55eb155f --- /dev/null +++ b/housekeeping/src/main/groovy/whelk/housekeeping/NotificationGenerator.groovy @@ -0,0 +1,300 @@ +package whelk.housekeeping + +import whelk.Document +import whelk.IdGenerator +import whelk.JsonLd +import whelk.Whelk +import groovy.transform.CompileStatic +import groovy.util.logging.Log4j2 as Log + +import java.sql.Array +import java.sql.Connection +import java.sql.PreparedStatement +import java.sql.ResultSet +import java.sql.Timestamp +import java.time.Instant +import java.time.ZoneOffset +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit + +import static whelk.util.Jackson.mapper + +@CompileStatic +@Log +class NotificationGenerator extends HouseKeeper { + + public static final String STATE_KEY = "CXZ notification generator" + private static final int MAX_OBSERVATIONS_PER_CHANGE = 20 + private String status = "OK" + private final Whelk whelk + + public NotificationGenerator(Whelk whelk) { + this.whelk = whelk + } + + public String getName() { + return "Notifications generator" + } + + public String getStatusDescription() { + return status + } + + public String getCronSchedule() { + return "* * * * *" + } + + public void trigger() { + // Determine the time interval of changes for which to generate notifications. + Instant now = Instant.now() + Timestamp from = Timestamp.from(now) // First run? Default to now (=do nothing but set the timestamp for next run) + Map state = whelk.getStorage().getState(STATE_KEY) + if (state && state.lastGenerationTime) + from = Timestamp.from( ZonedDateTime.parse( (String) state.lastGenerationTime, DateTimeFormatter.ISO_OFFSET_DATE_TIME).toInstant() ) + Timestamp until = Timestamp.from(now) + + Connection connection + PreparedStatement statement + ResultSet resultSet + + Map> changedInstanceIDsWithComments = [:] + + connection = whelk.getStorage().getOuterConnection() + connection.setAutoCommit(false) + try { + // Fetch all changed IDs within the interval + String sql = "SELECT id, ARRAY_AGG(data#>>'{@graph,0,hasChangeNote}') as changeNotes FROM lddb__versions WHERE collection IN ('bib', 'auth') AND ( modified > ? AND modified <= ? ) group by id;" + connection.setAutoCommit(false) + statement = connection.prepareStatement(sql) + statement.setTimestamp(1, from) + statement.setTimestamp(2, until) + statement.setFetchSize(512) + resultSet = statement.executeQuery() + while (resultSet.next()) { + String id = resultSet.getString("id") + + Array changeNotesArray = resultSet.getArray("changeNotes") + List changeNotes = [] + for (Object o : changeNotesArray.getArray()) { + if (o != null) + changeNotes.add(o) + } + + List> dependers = whelk.getStorage().followDependers(id, ["itemOf"]) + dependers.add(new Tuple2(id, null)) // This ID too, not _only_ the dependers! + dependers.each { + String dependerID = it[0] + String dependerMainEntityType = whelk.getStorage().getMainEntityTypeBySystemID(dependerID) + if (whelk.getJsonld().isSubClassOf(dependerMainEntityType, "Instance")) { + if (!changedInstanceIDsWithComments.containsKey(dependerID)) { + changedInstanceIDsWithComments.put(dependerID, []) + } + changedInstanceIDsWithComments[dependerID].addAll(changeNotes) + } + } + } + + for (String instanceId : changedInstanceIDsWithComments.keySet()) { + List resultingChangeObservations = generateObservationsForAffectedInstance( + instanceId, changedInstanceIDsWithComments[instanceId], from.toInstant(), until.toInstant()) + + if (resultingChangeObservations.size() <= MAX_OBSERVATIONS_PER_CHANGE) { + for (Document observation : resultingChangeObservations) { + if (!whelk.createDocument(observation, "NotificationGenerator", "SEK", "none", false)) { + log.error("Failed to create ChangeObservation:\n${observation.getDataAsString()}") + } + } + } + } + + } catch (Throwable e) { + status = "Failed with:\n" + e + "\nat:\n" + e.getStackTrace().toString() + throw e + } finally { + connection.close() + Map newState = new HashMap() + newState.lastGenerationTime = until.toInstant().atOffset(ZoneOffset.UTC).toString() + whelk.getStorage().putState(STATE_KEY, newState) + } + } + + private List generateObservationsForAffectedInstance(String instanceId, List changeNotes, Instant before, Instant after) { + List generatedObservations = [] + List propertiesToEmbellish = [ + "mainEntity", + "instanceOf", + "contribution", + "hasTitle", + "intendedAudience", + "classification", + "precededBy", + "succeededBy", + "contribution", + "agent", + ] + Document instanceAfterChange = whelk.getStorage().loadAsOf(instanceId, Timestamp.from(after)) + historicEmbellish(instanceAfterChange, propertiesToEmbellish, after) + Document instanceBeforeChange = whelk.getStorage().loadAsOf(instanceId, Timestamp.from(before)) + if (instanceBeforeChange == null) { // This instance is new, and did not exist at 'before'. + return generatedObservations + } + historicEmbellish(instanceBeforeChange, propertiesToEmbellish, before) + + Tuple comparisonResult = primaryContributionChanged(instanceBeforeChange, instanceAfterChange) + if (comparisonResult[0]) { + generatedObservations.add( + makeChangeObservation( + instanceId, changeNotes, "https://id.kb.se/changecategory/primarycontribution", + (Map) comparisonResult[1], (Map) comparisonResult[2]) + ) + } + + return generatedObservations + } + + private static Tuple primaryContributionChanged(Document instanceBeforeChange, Document instanceAfterChange) { + Object contributionsAfter = Document._get(["mainEntity", "instanceOf", "contribution"], instanceAfterChange.data) + Object contributionsBefore = Document._get(["mainEntity", "instanceOf", "contribution"], instanceBeforeChange.data) + if (contributionsBefore != null && contributionsAfter != null && contributionsBefore instanceof List && contributionsAfter instanceof List) { + for (Object contrBefore : contributionsBefore) { + for (Object contrAfter : contributionsAfter) { + if (contrBefore["@type"].equals("PrimaryContribution") && contrAfter["@type"].equals("PrimaryContribution")) { + if (contrBefore["agent"] != null && contrAfter["agent"] != null) { + if ( + contrBefore["agent"]["familyName"] != contrAfter["agent"]["familyName"] || + contrBefore["agent"]["givenName"] != contrAfter["agent"]["givenName"] || + contrBefore["agent"]["lifeSpan"] != contrAfter["agent"]["lifeSpan"] + ) + return new Tuple(true, contrBefore["agent"], contrAfter["agent"]) + } + } + } + } + } + return new Tuple(false, null, null) + } + + private Document makeChangeObservation(String instanceId, List changeNotes, String categoryUri, Map oldValue, Map newValue) { + String newId = IdGenerator.generate() + String metadataUri = Document.BASE_URI.toString() + newId + String mainEntityUri = metadataUri+"#it" + + // If the @id is left, the object is considered a link, and the actual data (which we want) is removed when storing this as a record. + Map oldValueEmbedded = new HashMap(oldValue) + oldValueEmbedded.remove("@id") + Map newValueEmbedded = new HashMap(newValue) + newValueEmbedded.remove("@id") + + Map observationData = [ "@graph":[ + [ + "@id" : metadataUri, + "@type" : "Record", + "mainEntity" : ["@id" : mainEntityUri], + ], + [ + "@id" : mainEntityUri, + "@type" : "ChangeObservation", + "concerning" : ["@id" : Document.BASE_URI.toString() + instanceId], + "representationBefore" : oldValueEmbedded, + "representationAfter" : newValueEmbedded, + "category" : ["@id" : categoryUri], + ] + ]] + + List comments = extractComments(changeNotes) + if (comments) { + Map mainEntity = (Map) observationData["@graph"][1] + mainEntity.put("comment", comments) + } + + return new Document(observationData) + } + + private List extractComments(List changeNotes) { + List comments = [] + for (Object changeNote : changeNotes) { + if ( ! (changeNote instanceof String) ) + continue + Map changeNoteMap = mapper.readValue( (String) changeNote, Map) + comments.addAll( asList(changeNoteMap["comment"]) ) + } + return comments + } + + private List asList(Object o) { + if (o == null) + return [] + if (o instanceof List) + return o + return [o] + } + + /** + * This is a simplified/specialized from of 'embellish', for historic data and using only select properties. + * The full general embellish code can not help us here, because it is based on the idea of cached cards, + * which can (and must!) only cache the latest/current data for each card, which isn't what we need here + * (we need to embellish older historic data). + * + * This function mutates docToEmbellish + */ + private void historicEmbellish(Document docToEmbellish, List properties, Instant asOf) { + List graphListToEmbellish = (List) docToEmbellish.data["@graph"] + Set alreadyLoadedURIs = [] + + for (int i = 0; i < properties.size(); ++i) { + Set uris = findLinkedURIs(graphListToEmbellish, properties) + uris.removeAll(alreadyLoadedURIs) + if (uris.isEmpty()) + break + + Map linkedDocumentsByUri = whelk.bulkLoad(uris, asOf) + linkedDocumentsByUri.each { + List linkedGraphList = (List) it.value.data["@graph"] + if (linkedGraphList.size() > 1) + graphListToEmbellish.add(linkedGraphList[1]) + } + alreadyLoadedURIs.addAll(uris) + } + + docToEmbellish.data = JsonLd.frame(docToEmbellish.getCompleteId(), docToEmbellish.data) + } + + private Set findLinkedURIs(Object node, List properties) { + Set uris = [] + if (node instanceof List) { + for (Object element : node) { + uris.addAll(findLinkedURIs(element, properties)) + } + } + else if (node instanceof Map) { + for (String key : node.keySet()) { + if (properties.contains(key)) { + uris.addAll(getLinkIfAny(node[key])) + } + uris.addAll(findLinkedURIs(node[key], properties)) + } + } + return uris + } + + private List getLinkIfAny(Object node) { + List uris = [] + if (node instanceof Map) { + if (node.containsKey("@id")) { + uris.add((String) node["@id"]) + } + } + if (node instanceof List) { + for (Object element : node) { + if (element instanceof Map) { + if (element.containsKey("@id")) { + uris.add((String) element["@id"]) + } + } + } + } + return uris + } + +} diff --git a/housekeeping/src/main/groovy/whelk/housekeeping/NotificationSender.groovy b/housekeeping/src/main/groovy/whelk/housekeeping/NotificationSender.groovy new file mode 100644 index 0000000000..52d1838056 --- /dev/null +++ b/housekeeping/src/main/groovy/whelk/housekeeping/NotificationSender.groovy @@ -0,0 +1,258 @@ +package whelk.housekeeping + +import groovy.transform.CompileStatic +import groovy.util.logging.Log4j2 +import org.simplejavamail.api.email.Email +import org.simplejavamail.api.mailer.Mailer +import org.simplejavamail.email.EmailBuilder +import org.simplejavamail.mailer.MailerBuilder +import whelk.Document +import whelk.JsonLd +import whelk.Whelk +import whelk.util.PropertyLoader + +import java.sql.Array +import java.sql.Connection +import java.sql.PreparedStatement +import java.sql.ResultSet +import java.sql.Timestamp +import java.time.Instant +import java.time.ZoneOffset +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit + +import static whelk.util.Jackson.mapper + +@CompileStatic +@Log4j2 +class NotificationSender extends HouseKeeper { + + private final String STATE_KEY = "CXZ notification email sender" + private String status = "OK" + private final Whelk whelk + private final Mailer mailer + private final String senderAddress + + public NotificationSender(Whelk whelk) { + this.whelk = whelk + Properties props = PropertyLoader.loadProperties("secret") + if (props.containsKey("smtpServer") && + props.containsKey("smtpPort") && + props.containsKey("smtpSender") && + props.containsKey("smtpUser") && + props.containsKey("smtpPassword")) + mailer = MailerBuilder + .withSMTPServer( + (String) props.get("smtpServer"), + Integer.parseInt((String)props.get("smtpPort")), + (String) props.get("smtpUser"), + (String) props.get("smtpPassword") + ).buildMailer() + senderAddress = props.get("smtpSender") + } + + @Override + String getName() { + return "Notifications sender" + } + + @Override + String getStatusDescription() { + return status + } + + public String getCronSchedule() { + return "* * * * *" + } + + @Override + void trigger() { + // Build a multi-map of library -> list of settings objects for that library's users + Map> heldByToUserSettings = new HashMap<>(); + { + List allUserSettingStrings = whelk.getStorage().getAllUserData() + for (Map settings : allUserSettingStrings) { + if (!settings["notificationEmail"]) + continue + settings?.requestedNotifications?.each { request -> + if (!request instanceof Map) + return + if (!request["heldBy"]) + return + + String heldBy = request["heldBy"] + if (!heldByToUserSettings.containsKey(heldBy)) + heldByToUserSettings.put(heldBy, []) + heldByToUserSettings[heldBy].add(settings) + } + } + } + + // Determine the time interval of ChangeObservations to consider + Timestamp from = Timestamp.from(Instant.now().minus(1, ChronoUnit.DAYS)) // Default to last 24h if first time. + Map sendState = whelk.getStorage().getState(STATE_KEY) + if (sendState && sendState.notifiedChangesUpTo) + from = Timestamp.from( ZonedDateTime.parse( (String) sendState.notifiedChangesUpTo, DateTimeFormatter.ISO_OFFSET_DATE_TIME).toInstant() ) + + Connection connection + PreparedStatement statement + ResultSet resultSet + + Instant notifiedChangesUpTo = from.toInstant() + + connection = whelk.getStorage().getOuterConnection() + connection.setAutoCommit(false) + try { + String sql = "SELECT MAX(created) as lastChange, data#>>'{@graph,1,concerning,@id}' as instanceUri, ARRAY_AGG(data::text) as data FROM lddb WHERE data#>>'{@graph,1,@type}' = 'ChangeObservation' AND created > ? GROUP BY data#>>'{@graph,1,concerning,@id}';" + connection.setAutoCommit(false) + statement = connection.prepareStatement(sql) + statement.setTimestamp(1, from) + //System.err.println(" ** Searching for Observations: " + statement) + statement.setFetchSize(512) + resultSet = statement.executeQuery() + + while (resultSet.next()) { + String instanceUri = resultSet.getString("instanceUri") + Array changeObservationsArray = resultSet.getArray("data") + // Groovy.. + List changeObservationsForInstance = [] + for (Object o : changeObservationsArray.getArray()) { + changeObservationsForInstance.add(o) + } + + sendFor(instanceUri, heldByToUserSettings, changeObservationsForInstance) + + Instant lastChangeObservationForInstance = resultSet.getTimestamp("lastChange").toInstant() + if (lastChangeObservationForInstance.isAfter(notifiedChangesUpTo)) + notifiedChangesUpTo = lastChangeObservationForInstance + } + } catch (Throwable e) { + status = "Failed with:\n" + e + "\nat:\n" + e.getStackTrace().toString() + throw e + } finally { + connection.close() + if (notifiedChangesUpTo.isAfter(from.toInstant())) { + Map newState = new HashMap() + newState.notifiedChangesUpTo = notifiedChangesUpTo.atOffset(ZoneOffset.UTC).toString() + whelk.getStorage().putState(STATE_KEY, newState) + } + } + + } + + private void sendFor(String instanceUri, Map> heldByToUserSettings, List changeObservationsForInstance) { + String instanceId = whelk.getStorage().getSystemIdByIri(instanceUri) + List libraries = whelk.getStorage().getAllLibrariesHolding(instanceId) + + for (String library : libraries) { + List users = (List) heldByToUserSettings[library] + if (users) { + for (Map user : users) { + /* + 'user' is now a map looking something like this: + { + "id": "sldknfslkdnsdlkgnsdkjgnb" + "requestedNotifications": [ + { + "heldBy": "https://libris.kb.se/library/Utb1", + "triggers": [ + "https://id.kb.se/changecategory/primarycontribution" + ] + } + ], + "notificationEmail": "noreply@kb.se" + }*/ + + List matchedObservations = [] + + user?.requestedNotifications?.each { Map request -> + request?.triggers?.each { String trigger -> + Map triggeredObservation = matches(trigger, changeObservationsForInstance) + if (triggeredObservation != null) { + matchedObservations.add(triggeredObservation) + } + } + } + + if (!matchedObservations.isEmpty() && user.notificationEmail && user.notificationEmail instanceof String) { + String body = generateEmailBody(instanceId, matchedObservations) + sendEmail(senderAddress, (String) user.notificationEmail, "CXZ", body) + + System.err.println("Now send email to " + user.notificationEmail + "\n\t" + body) + } + } + } + } + + } + + private Map matches(String trigger, List changeObservationsForInstance) { + for (Object obj : changeObservationsForInstance) { + Map changeObservationMap = mapper.readValue( (String) obj, Map ) + List graphList = changeObservationMap["@graph"] + Map mainEntity = graphList?[1] + String category = mainEntity?.category["@id"] + if (category && category == trigger) + return changeObservationMap + } + return null + } + + private void sendEmail(String sender, String recipient, String subject, String body) { + if (mailer) { + Email email = EmailBuilder.startingBlank() + .to(recipient) + .withSubject(subject) + .from(sender) + .withPlainText(body) + .buildEmail() + + log.info("Sending notification (cxz) email to " + recipient) + mailer.sendMail(email) + } else { + log.info("Should now have sent notification (cxz) email to " + recipient + " but SMTP is not configured.") + } + } + + private String generateEmailBody(String changedInstanceId, List triggeredObservations) { + + Document current = whelk.getStorage().load(changedInstanceId) + String mainTitle = Document._get(["@graph", 1, "hasTitle", 0, "mainTitle"], current.data) + + StringBuilder sb = new StringBuilder() + sb.append("Ändringar har skett i instans: " + Document.BASE_URI.resolve(changedInstanceId).toString()) + if (mainTitle) + sb.append(" (" + mainTitle + ")\n") + else + sb.append("\n") + + for (Map observation : triggeredObservations) { + String observationUri = Document._get(["@graph", 1, "@id"], observation) + if (!observationUri) + continue + + String observationId = whelk.getStorage().getSystemIdByIri(observationUri) + Document embellishedObservation = whelk.loadEmbellished(observationId) + Map framed = JsonLd.frame(observationUri, embellishedObservation.data) + + Map category = whelk.getJsonld().applyLensAsMapByLang( (Map) framed["category"], ["sv"] as Set, [], ["chips"]) + Map before = whelk.getJsonld().applyLensAsMapByLang( (Map) framed["representationBefore"], ["sv"] as Set, [], ["chips"]) + Map after = whelk.getJsonld().applyLensAsMapByLang( (Map) framed["representationAfter"], ["sv"] as Set, [], ["chips"]) + sb.append("\tÄndring avser kategorin: "+ category["sv"]) + sb.append("\n\t\tInnan ändring: " + before["sv"]) + sb.append("\n\t\tEfter ändring: " + after["sv"]) + + Object comments = Document._get(["@graph", 1, "comment"], observation) + + if (comments instanceof List) { + sb.append("\n\t\tTillhörande kommentarer:") + for (String comment : comments) + sb.append("\n\t\t\t"+comment) + } + sb.append("\n\n") + } + + return sb.toString() + } +} diff --git a/housekeeping/src/main/groovy/whelk/housekeeping/WebInterface.groovy b/housekeeping/src/main/groovy/whelk/housekeeping/WebInterface.groovy new file mode 100755 index 0000000000..11736e513e --- /dev/null +++ b/housekeeping/src/main/groovy/whelk/housekeeping/WebInterface.groovy @@ -0,0 +1,92 @@ +package whelk.housekeeping + +import whelk.Whelk; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import groovy.transform.CompileStatic +import groovy.util.logging.Log4j2 as Log +import java.time.ZonedDateTime +import it.sauronsoftware.cron4j.Scheduler + +@CompileStatic +@Log +public abstract class HouseKeeper { + public abstract String getName() + public abstract String getStatusDescription() + public abstract String getCronSchedule() + public abstract void trigger() + + public ZonedDateTime lastFailAt = null + public ZonedDateTime lastRunAt = null + + synchronized void _trigger() { + try { + trigger() + lastRunAt = ZonedDateTime.now() + } catch (Throwable e) { + log.error("Could not handle throwable in Housekeeper TimerTask.", e) + lastFailAt = ZonedDateTime.now() + } + } +} + +@CompileStatic +@Log +public class WebInterface extends HttpServlet { + private Map houseKeepersById = [:] + Scheduler cronScheduler = new Scheduler() + + public void init() { + Whelk whelk = Whelk.createLoadedSearchWhelk() + + List houseKeepers = [ + new NotificationGenerator(whelk), + new NotificationSender(whelk) + ] + + houseKeepers.each { hk -> + String id = cronScheduler.schedule(hk.getCronSchedule(), { + hk._trigger() + }) + houseKeepersById.put(id, hk) + } + cronScheduler.start() + } + + public void destroy() { + cronScheduler.stop() + } + + public void doGet(HttpServletRequest req, HttpServletResponse res) { + StringBuilder sb = new StringBuilder() + sb.append("Active housekeepers: " + houseKeepersById.size() + "\n") + sb.append("--------------\n") + for (String key : houseKeepersById.keySet()) { + HouseKeeper hk = houseKeepersById[key] + sb.append(hk.getName() + "\n") + if (hk.lastRunAt) + sb.append("Last run at: " + hk.lastRunAt + "\n") + else + sb.append("Has never run\n") + if (hk.lastFailAt) + sb.append("Last failed at: " + hk.lastFailAt + "\n") + else + sb.append("No failures\n") + sb.append("Status:\n") + sb.append(hk.statusDescription+"\n") + sb.append("Execution schedule:\n") + sb.append(hk.cronSchedule+"\n") + sb.append("To force immediate execution, POST to:\n" + req.getRequestURL() + key + "\n") + sb.append("--------------\n") + } + res.setStatus(HttpServletResponse.SC_OK) + res.setContentType("text/plain") + res.getOutputStream().print(sb.toString()) + } + + public void doPost(HttpServletRequest req, HttpServletResponse res) { + String key = req.getRequestURI().split("/").last() + houseKeepersById[key]._trigger() + } +} diff --git a/housekeeping/src/main/resources/log4j2.xml b/housekeeping/src/main/resources/log4j2.xml new file mode 100644 index 0000000000..5819dcd87b --- /dev/null +++ b/housekeeping/src/main/resources/log4j2.xml @@ -0,0 +1,22 @@ + + + + . + + + + + + + + + + + + + + + + + + diff --git a/housekeeping/src/main/webapp/WEB-INF/web.xml b/housekeeping/src/main/webapp/WEB-INF/web.xml new file mode 100755 index 0000000000..669dcf68ce --- /dev/null +++ b/housekeeping/src/main/webapp/WEB-INF/web.xml @@ -0,0 +1,21 @@ + + + + Housekeeping + + + HousekeepingServlet + whelk.housekeeping.WebInterface + 1 + + + + HousekeepingServlet + / + + + diff --git a/librisxl-tools/postgresql/migrations/00000021-add-state-table.plsql b/librisxl-tools/postgresql/migrations/00000021-add-state-table.plsql new file mode 100644 index 0000000000..9e90233308 --- /dev/null +++ b/librisxl-tools/postgresql/migrations/00000021-add-state-table.plsql @@ -0,0 +1,33 @@ +BEGIN; + +DO $$DECLARE + -- THESE MUST BE CHANGED WHEN YOU COPY THE SCRIPT! + + -- The version you expect the database to have _before_ the migration + old_version numeric := 20; + -- The version the database should have _after_ the migration + new_version numeric := 21; + + -- hands off + existing_version numeric; + +BEGIN + + -- Check existing version +SELECT version from lddb__schema INTO existing_version; +IF ( existing_version <> old_version) THEN + RAISE EXCEPTION 'ASKED TO MIGRATE FROM INCORRECT EXISTING VERSION!'; +ROLLBACK; +END IF; +UPDATE lddb__schema SET version = new_version; + +-- ACTUAL SCHEMA CHANGES HERE: + +CREATE TABLE IF NOT EXISTS lddb__state ( + key text PRIMARY KEY, + value jsonb NOT NULL +); + +END$$; + +COMMIT; diff --git a/rest/src/main/groovy/whelk/rest/api/UserDataAPI.groovy b/rest/src/main/groovy/whelk/rest/api/UserDataAPI.groovy index f14181b754..7dd0c0165a 100644 --- a/rest/src/main/groovy/whelk/rest/api/UserDataAPI.groovy +++ b/rest/src/main/groovy/whelk/rest/api/UserDataAPI.groovy @@ -16,7 +16,7 @@ import java.util.stream.Collectors class UserDataAPI extends HttpServlet { private Whelk whelk private static final int POST_MAX_SIZE = 1000000 - private static final String ID_HASH_FUNCTION = "SHA-256" + static final String ID_HASH_FUNCTION = "SHA-256" UserDataAPI() { } diff --git a/rest/src/test/groovy/whelk/rest/api/CrudSpec.groovy b/rest/src/test/groovy/whelk/rest/api/CrudSpec.groovy index 507071560b..af4c77305f 100644 --- a/rest/src/test/groovy/whelk/rest/api/CrudSpec.groovy +++ b/rest/src/test/groovy/whelk/rest/api/CrudSpec.groovy @@ -817,8 +817,8 @@ class CrudSpec extends Specification { } storage.loadDocumentHistory(_) >> { [ - new DocumentVersion(new Document(['@graph': [['modified':'2022-02-02T12:00:00Z'], ['a': 'x']]]), "foo", ""), - new DocumentVersion(new Document(['@graph': [['modified':'2022-02-02T12:00:00Z'], ['a': 'y']]]), "bar", ""), + new DocumentVersion(new Document(['@graph': [['modified':'2022-02-02T12:00:00Z'], ['a': 'x']]]), "foo", ""), + new DocumentVersion(new Document(['@graph': [['modified':'2022-02-02T12:00:00Z'], ['a': 'y']]]), "bar", ""), ] } diff --git a/settings.gradle b/settings.gradle index 93a932f59c..9bf39950a2 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,4 +1,4 @@ rootProject.name = 'librisxl' include('apix_export', 'apix_server', 'batchimport', 'importers', 'marc_export', 'oaipmh', 'rest', 'whelk-core', 'whelktool', - 'gui-whelktool', 'trld-java', 'librisworks') + 'gui-whelktool', 'trld-java', 'librisworks', 'housekeeping') diff --git a/whelk-core/src/main/groovy/whelk/Whelk.groovy b/whelk-core/src/main/groovy/whelk/Whelk.groovy index cfe2f6b427..564543329a 100644 --- a/whelk-core/src/main/groovy/whelk/Whelk.groovy +++ b/whelk-core/src/main/groovy/whelk/Whelk.groovy @@ -22,6 +22,7 @@ import whelk.search.ElasticFind import whelk.util.PropertyLoader import whelk.util.Romanizer +import java.time.Instant import java.time.ZoneId /** @@ -258,7 +259,7 @@ class Whelk { return doc } - Map bulkLoad(Collection ids) { + Map bulkLoad(Collection ids, Instant asOf = null) { def idMap = [:] def otherIris = [] List systemIds = [] @@ -281,7 +282,7 @@ class Whelk { idMap.putAll(idToIri) } - return storage.bulkLoad(systemIds) + return storage.bulkLoad(systemIds, asOf) .findAll { id, doc -> !doc.deleted } .collectEntries { id, doc -> [(idMap.getOrDefault(id, id)): doc] } } diff --git a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy index f005b24a42..b6dd41bb8a 100644 --- a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy +++ b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy @@ -342,6 +342,9 @@ class PostgreSQLComponent { private static final String GET_COLLECTION_BY_SYSTEM_ID = "SELECT collection FROM lddb where id = ?" + private static final String GET_MAINENTITY_TYPE_BY_SYSTEM_ID = + "SELECT data#>>'{@graph,1,@type}' FROM lddb WHERE id = ?" + /** This query does the same as LOAD_COLLECTIONS = "SELECT DISTINCT collection FROM lddb" but much faster because postgres does not yet have 'loose indexscan' aka 'index skip scan' https://wiki.postgresql.org/wiki/Loose_indexscan' */ @@ -411,9 +414,22 @@ class PostgreSQLComponent { SELECT id FROM lddb WHERE data#>'{@graph,0,inDataset}' @> ?::jsonb AND deleted = false """.stripIndent() + private static final String GET_STATE = + "SELECT value FROM lddb__state WHERE key = ?" + + private static final String UPSERT_STATE = """ + INSERT INTO lddb__state (key, value) + VALUES (?, ?) + ON CONFLICT (key) DO UPDATE + SET (key, value) = (EXCLUDED.key, EXCLUDED.value) + """.stripIndent() + private static final String GET_USER_DATA = "SELECT data FROM lddb__user_data WHERE id = ?" + private static final String GET_ALL_USER_DATA = + "SELECT id, data FROM lddb__user_data" + private static final String UPSERT_USER_DATA = """ INSERT INTO lddb__user_data (id, data, modified) VALUES (?, ?, ?) @@ -430,6 +446,11 @@ class PostgreSQLComponent { JOIN lddb ON lddb__identifiers.id = lddb.id WHERE lddb__identifiers.iri = ? """.stripIndent() + private static final String GET_ALL_LIBRARIES_HOLDING_ID = """ + SELECT l.data#>>'{@graph,1,heldBy,@id}' FROM lddb__dependencies d + LEFT JOIN lddb l ON d.id = l.id + WHERE d.dependsonid = ? AND d.relation = 'itemOf'""" + private HikariDataSource connectionPool private HikariDataSource outerConnectionPool @@ -1406,7 +1427,28 @@ class PostgreSQLComponent { storeCard(cardEntry) return cardEntry.getCard().data } - + + List getAllLibrariesHolding(String id) { + return withDbConnection { + Connection connection = getMyConnection() + PreparedStatement preparedStatement = null + ResultSet rs = null + try { + preparedStatement = connection.prepareStatement(GET_ALL_LIBRARIES_HOLDING_ID) + preparedStatement.setString(1, id) + + rs = preparedStatement.executeQuery() + List results = [] + while(rs.next()) { + results.add(rs.getString(1)) + } + return results + } finally { + close(rs, preparedStatement) + } + } + } + void recalculateDependencies(Document doc) { withDbConnection { saveDependencies(doc, getMyConnection()) @@ -1867,6 +1909,32 @@ class PostgreSQLComponent { } } + String getMainEntityTypeBySystemID(String id) { + return withDbConnection { + Connection connection = getMyConnection() + return getMainEntityTypeBySystemID(id, connection) + } + } + + String getMainEntityTypeBySystemID(String id, Connection connection) { + PreparedStatement selectStatement = null + ResultSet resultSet = null + + try { + selectStatement = connection.prepareStatement(GET_MAINENTITY_TYPE_BY_SYSTEM_ID) + selectStatement.setString(1, id) + resultSet = selectStatement.executeQuery() + + if (resultSet.next()) { + return resultSet.getString(1) + } + return null + } + finally { + close(resultSet, selectStatement) + } + } + Document load(String id) { return load(id, null) } @@ -2619,6 +2687,72 @@ class PostgreSQLComponent { } } + void putState(String key, Map value) { + withDbConnection { + Connection connection = getMyConnection() + PreparedStatement preparedStatement = null + try { + PGobject jsonb = new PGobject() + jsonb.setType("jsonb") + jsonb.setValue( mapper.writeValueAsString(value) ) + + preparedStatement = connection.prepareStatement(UPSERT_STATE) + preparedStatement.setString(1, key) + preparedStatement.setObject(2, jsonb) + + preparedStatement.executeUpdate() + } finally { + close(preparedStatement) + } + } + } + + Map getState(String key) { + return withDbConnection { + Connection connection = getMyConnection() + PreparedStatement preparedStatement = null + ResultSet rs = null + try { + preparedStatement = connection.prepareStatement(GET_STATE) + preparedStatement.setString(1, key) + + rs = preparedStatement.executeQuery() + if (rs.next()) { + return mapper.readValue(rs.getString("value"), Map) + } + else { + return null + } + } finally { + close(rs, preparedStatement) + } + } + } + + /** + * Returns the user-data map for each user _with the user id_ also inserted into the map. + */ + List getAllUserData() { + return withDbConnection { + Connection connection = getMyConnection() + PreparedStatement preparedStatement = null + ResultSet rs = null + List result = [] + try { + preparedStatement = connection.prepareStatement(GET_ALL_USER_DATA) + rs = preparedStatement.executeQuery() + while (rs.next()) { + Map userdata = mapper.readValue(rs.getString("data"), Map) + userdata.put("id", rs.getString("id")) + result.add(userdata) + } + return result + } finally { + close(rs, preparedStatement) + } + } + } + String getUserData(String id) { return withDbConnection { Connection connection = getMyConnection()