Skip to content

Commit

Permalink
Serve bulk reports (#1517)
Browse files Browse the repository at this point in the history
* Add bulk:execution with document counts and links to reports
* Serve bulk whelktool reports from housekeeping
  • Loading branch information
olovy authored Nov 14, 2024
1 parent a4c167d commit a3d0609
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 15 deletions.
56 changes: 55 additions & 1 deletion housekeeping/src/main/groovy/whelk/HouseKeepingServer.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
package whelk;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.eclipse.jetty.ee8.servlet.DefaultServlet;
import org.eclipse.jetty.ee8.servlet.FilterHolder;
import org.eclipse.jetty.ee8.servlet.ServletContextHandler;
import org.eclipse.jetty.ee8.servlet.ServletHolder;
import org.eclipse.jetty.server.Server;
import whelk.datatool.bulkchange.BulkJob;
import whelk.housekeeping.BulkChangePreviewAPI;
import whelk.housekeeping.WebInterface;
import whelk.util.WhelkFactory;
import whelk.util.http.HttpTools;

import javax.servlet.DispatcherType;
import javax.servlet.FilterChain;
import javax.servlet.ServletException;
import javax.servlet.http.HttpFilter;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.EnumSet;

import static whelk.datatool.bulkchange.BulkJob.BULK_CONTEXT_PATH;
import static whelk.datatool.bulkchange.BulkJob.BULK_REPORTS_PATH;

public class HouseKeepingServer extends XlServer {
private final static Logger log = LogManager.getLogger(HouseKeepingServer.class);

@Override
protected void configureHandlers(Server server) {
ServletContextHandler context = new ServletContextHandler();
Expand All @@ -17,11 +39,43 @@ protected void configureHandlers(Server server) {
ServletHolder holder = new ServletHolder(WebInterface.class);
holder.setInitOrder(0);
context.addServlet(holder, "/");
context.addServlet(BulkChangePreviewAPI.class, "/_bulk-change/*");
context.addServlet(BulkChangePreviewAPI.class, BULK_CONTEXT_PATH + "/*");

serveBulkReports(context);

serveStaticContent(context);
}

private static void serveBulkReports(ServletContextHandler context) {
String dir;
try {
dir = BulkJob.reportBaseDir(WhelkFactory.getSingletonWhelk()).getCanonicalPath();
} catch (IOException e) {
throw new RuntimeException(e);
}

ServletHolder holder = new ServletHolder("bulk-reports", DefaultServlet.class);
holder.setInitParameter("resourceBase", dir);
holder.setInitParameter("dirAllowed", "false");
holder.setInitParameter("pathInfoOnly", "true");
var path = BULK_REPORTS_PATH + "/*";
context.addServlet(holder, path);

context.addFilter(new FilterHolder(new HttpFilter() {
@Override
protected void doFilter(HttpServletRequest req, HttpServletResponse res, FilterChain chain) throws IOException, ServletException {
var filePath = req.getPathInfo();
if (filePath != null && BulkJob.FORBIDDEN_REPORTS.contains(Paths.get(filePath).getFileName().toString())) {
HttpTools.sendError(res, 403, "Forbidden");
} else {
super.doFilter(req, res, chain);
}
}
}), path, EnumSet.of(DispatcherType.REQUEST));

log.info("Serving {} on {}", dir, path);
}

public static void main(String[] args) throws Exception {
new HouseKeepingServer().run();
}
Expand Down
15 changes: 10 additions & 5 deletions whelktool/src/main/groovy/whelk/datatool/WhelkTool.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class WhelkTool {
static final int DEFAULT_BATCH_SIZE = 500
public static final int DEFAULT_FETCH_SIZE = 100
static final int DEFAULT_STATS_NUM_IDS = 3
public static final String MAIN_LOG_NAME = "MAIN.txt"
public static final String ERROR_LOG_NAME = "ERRORS.txt"
public static final String MODIFIED_LOG_NAME = "MODIFIED.txt"
public static final String CREATED_LOG_NAME = "CREATED.txt"
public static final String DELETED_LOG_NAME = "DELETED.txt"

Whelk whelk
IdLoader idLoader
Expand Down Expand Up @@ -114,14 +119,14 @@ class WhelkTool {
this.defaultChangedBy = script.scriptJobUri
this.reportsDir = reportsDir
reportsDir.mkdirs()
mainLog = new PrintWriter(new File(reportsDir, "MAIN.txt"))
errorLog = new PrintWriter(new File(reportsDir, "ERRORS.txt"))
mainLog = new PrintWriter(new File(reportsDir, MAIN_LOG_NAME))
errorLog = new PrintWriter(new File(reportsDir, ERROR_LOG_NAME))

def modifiedLogFile = new File(reportsDir, "MODIFIED.txt")
def modifiedLogFile = new File(reportsDir, MODIFIED_LOG_NAME)
modifiedLog = new PrintWriter(modifiedLogFile)
def createdLogFile = new File(reportsDir, "CREATED.txt")
def createdLogFile = new File(reportsDir, CREATED_LOG_NAME)
createdLog = new PrintWriter(createdLogFile)
def deletedLogFile = new File(reportsDir, "DELETED.txt")
def deletedLogFile = new File(reportsDir, DELETED_LOG_NAME)
deletedLog = new PrintWriter(deletedLogFile)

try {
Expand Down
87 changes: 81 additions & 6 deletions whelktool/src/main/java/whelk/datatool/bulkchange/BulkJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,31 @@
import whelk.component.PostgreSQLComponent;
import whelk.datatool.Script;
import whelk.datatool.WhelkTool;
import whelk.datatool.bulkchange.BulkJobDocument.Status;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import java.util.stream.Stream;

import static whelk.Document.HASH_IT;
import static whelk.datatool.WhelkTool.CREATED_LOG_NAME;
import static whelk.datatool.WhelkTool.DELETED_LOG_NAME;
import static whelk.datatool.WhelkTool.MAIN_LOG_NAME;
import static whelk.datatool.WhelkTool.MODIFIED_LOG_NAME;
import static whelk.datatool.bulkchange.BulkJobDocument.JOB_TYPE;
import static whelk.datatool.bulkchange.BulkJobDocument.Status.Completed;
import static whelk.datatool.bulkchange.BulkJobDocument.Status.Failed;
Expand All @@ -27,16 +41,23 @@

public class BulkJob implements Runnable {
private static final Logger logger = Logger.getLogger(BulkJob.class);

public static final String BULK_CONTEXT_PATH = "/_bulk-change";
public static final String BULK_REPORTS_PATH = BULK_CONTEXT_PATH + "/reports";
public static final Set<String> FORBIDDEN_REPORTS = Set.of(MAIN_LOG_NAME);

protected static final String REPORTS_DIR = "bulk-change-reports";

protected final String id;
protected final String systemId;
protected final Whelk whelk;
protected final String executionId;

public BulkJob(Whelk whelk, String id) {
this.whelk = whelk;
this.id = id;
this.systemId = stripSuffix(stripPrefix(id, Document.getBASE_URI().toString()), Document.HASH_IT);
this.executionId = executionId(systemId);
}

@Override
Expand All @@ -61,12 +82,61 @@ public void run() {

tool.run();

storeUpdate(doc -> doc.setStatus(Completed));
if ((tool.getErrorDetected() != null)) {
finish(Failed);
} else {
finish(Completed);
}
} catch (Exception e) {
// TODO
logger.error(e);
System.err.println(e);
storeUpdate(doc -> doc.setStatus(Failed));
finish(Failed);
}
}

private void finish(Status status) {
storeUpdate(doc -> {
doc.setStatus(status);
doc.addExecution(
ZonedDateTime.now(ZoneId.systemDefault()),
status,
filteredReports(),
lineCount(CREATED_LOG_NAME),
lineCount(MODIFIED_LOG_NAME),
lineCount(DELETED_LOG_NAME));
});
}

private long lineCount(String reportName) {
try (Stream<String> stream = Files.lines(new File(reportDir(), reportName).toPath(), StandardCharsets.UTF_8)) {
return stream.count();
} catch(FileNotFoundException ignored) {
return 0;
} catch (IOException e) {
logger.warn("Could not get line count", e);
return 0;
}
}

private List<String> filteredReports() {
try {
var files = reportDir().listFiles(pathname ->
pathname.isFile() && !FORBIDDEN_REPORTS.contains(pathname.getName())
);

if (files == null) {
throw new IOException("Could not list files");
}

var path = BULK_REPORTS_PATH + "/" + reportDir().getName() + "/";
return Arrays.stream(files)
.filter(f -> lineCount(f.getName()) > 0)
.map(f -> path + f.getName())
.toList();
} catch (IOException e) {
logger.warn(e.getMessage(), e);
return Collections.emptyList();
}
}

Expand All @@ -86,7 +156,8 @@ protected WhelkTool buildWhelkTool(BulkJobDocument jobDoc) throws IOException {

Script script = jobDoc.getSpecification().getScript(bulkJobThingId);

WhelkTool tool = new WhelkTool(whelk, script, reportDir(systemId), WhelkTool.getDEFAULT_STATS_NUM_IDS());
WhelkTool tool = new WhelkTool(whelk, script, reportDir(), WhelkTool.getDEFAULT_STATS_NUM_IDS());

// TODO for now setting changedBy only works for loud changes (!minorChange in PostgreSQLComponent)
tool.setDefaultChangedBy(jobDoc.getChangeAgentId());
tool.setAllowLoud(jobDoc.shouldUpdateModifiedTimestamp());
Expand All @@ -108,11 +179,15 @@ private void storeUpdate(Consumer<BulkJobDocument> updater) {
whelk.storeAtomicUpdate(systemId, minorUpdate, writeIdenticalVersions, changedIn, changedBy, updateAgent);
}

protected File reportDir(String baseName) throws IOException {
return new File(new File(whelk.getLogRoot(), REPORTS_DIR), reportLeafDir(baseName));
protected File reportDir() throws IOException {
return new File(reportBaseDir(whelk), executionId);
}

public static File reportBaseDir(Whelk whelk) throws IOException {
return new File(whelk.getLogRoot(), REPORTS_DIR);
}

protected static String reportLeafDir(String baseName) {
protected static String executionId(String baseName) {
String now = LocalDateTime
.now(ZoneId.systemDefault())
.truncatedTo(ChronoUnit.SECONDS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
import whelk.util.DocumentUtil;
import whelk.util.JsonLdKey;

import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static whelk.JsonLd.ID_KEY;
import static whelk.JsonLd.asList;
import static whelk.util.JsonLdKey.fromKey;

// All terms are defined in https://github.com/libris/definitions/blob/develop/source/vocab/platform.ttl
Expand Down Expand Up @@ -66,12 +70,20 @@ public String key() {
public static final String DEPRECATE_KEY = "bulk:deprecate";
public static final String SCRIPT_KEY = "bulk:script";
public static final String RDF_VALUE = "value";
public static final String EXECUTION_KEY = "bulk:execution";
public static final String EXECUTION_TYPE = "bulk:Execution";
public static final String REPORT_KEY = "bulk:report";
public static final String END_TIME_KEY = "endTime";
public static final String NUM_CREATED_KEY = "bulk:numCreated";
public static final String NUM_UPDATED_KEY = "bulk:numUpdated";
public static final String NUM_DELETED_KEY = "bulk:numDeleted";

private static final List<Object> STATUS_PATH = List.of(JsonLd.GRAPH_KEY, 1, STATUS_KEY);
private static final List<Object> UPDATE_TIMESTAMP_PATH = List.of(JsonLd.GRAPH_KEY, 1, SHOULD_UPDATE_TIMESTAMP_KEY);
private static final List<Object> LABELS_PATH = List.of(JsonLd.GRAPH_KEY, 1, LABEL_KEY, "*");
private static final List<Object> COMMENTS_PATH = List.of(JsonLd.GRAPH_KEY, 1, COMMENT_KEY, "*");
private static final List<Object> SPECIFICATION_PATH = List.of(JsonLd.GRAPH_KEY, 1, CHANGE_SPEC_KEY);
private static final List<Object> EXECUTION_PATH = List.of(JsonLd.GRAPH_KEY, 1, EXECUTION_KEY);

public BulkJobDocument(Document doc) {
this(doc.data);
Expand Down Expand Up @@ -109,6 +121,31 @@ public Map<String, Object> getSpecificationRaw() {
return get(data, SPECIFICATION_PATH);
}

@SuppressWarnings("unchecked")
public void addExecution(ZonedDateTime endTime, Status status, List<String> reportPaths,
long numCreated, long numUpdated, long numDeleted) {
var e = new HashMap<>(Map.of(
JsonLd.TYPE_KEY, EXECUTION_TYPE,
REPORT_KEY, reportPaths.stream().map(s -> Map.of(ID_KEY, s)).toList(),
END_TIME_KEY, endTime.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME),
STATUS_KEY, status.key()
));

if (numCreated > 0) {
e.put(NUM_CREATED_KEY, numCreated);
}
if (numUpdated > 0) {
e.put(NUM_UPDATED_KEY, numUpdated);
}
if (numDeleted > 0) {
e.put(NUM_DELETED_KEY, numDeleted);
}

var executions = asList(get(data, EXECUTION_PATH));
executions.add(e);
_set(EXECUTION_PATH, executions, data);
}

public Specification getSpecification() {
Map<String, Object> spec = getSpecificationRaw();
if (spec == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void run() {
}

@Override
protected File reportDir(String baseName) throws IOException {
return new File(Files.createTempDirectory(REPORTS_DIR).toFile(), reportLeafDir(baseName));
protected File reportDir() throws IOException {
return new File(Files.createTempDirectory(REPORTS_DIR).toFile(), executionId);
}
}
1 change: 0 additions & 1 deletion whelktool/src/main/java/whelk/datatool/util/IdLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.Array;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
Expand Down

0 comments on commit a3d0609

Please sign in to comment.