Skip to content

Commit

Permalink
Updated logic for generate backfill plugin
Browse files Browse the repository at this point in the history
Signed-off-by: Alexandre Flores <[email protected]>
  • Loading branch information
SugaryLump committed Oct 25, 2024
1 parent c9206ae commit 50e4ffb
Show file tree
Hide file tree
Showing 18 changed files with 373 additions and 810 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1427,8 +1427,9 @@ public enum OrchestratorType {

// Generate Backfill Plugin Parameters

public static final String PLUGIN_PARAMS_BLOCK_SIZE = "parameter.block_size";
public static final String PLUGIN_PARAMS_VALIDATE_AGAINST = "parameter.validate_against";
public static final String PLUGIN_PARAMS_OUTPUT_DIRECTORY = "parameter.output_directory";
public static final String PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY = "parameter.only_generate_inventory";
public static final String PLUGIN_PARAMS_START_DATE = "parameter.start_date";

public static final String PLUGIN_CATEGORY_CONVERSION = "conversion";
public static final String PLUGIN_CATEGORY_CHARACTERIZATION = "characterization";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.sax.SAXSource;

import jakarta.xml.bind.JAXBElement;
import org.apache.commons.io.IOUtils;
import org.roda.core.data.common.RodaConstants;
import org.roda.core.data.exceptions.GenericException;
Expand Down Expand Up @@ -50,6 +51,23 @@ public static String getXMLFromObject(Object object) throws GenericException {
return ret;
}

public static String getXMLFragFromObject(JAXBElement<?> object) throws GenericException {
String ret = null;
JAXBContext jaxbContext;
try {
jaxbContext = JAXBContext.newInstance(object.getValue().getClass());
Marshaller marshaller = jaxbContext.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
StringWriter writer = new StringWriter();
marshaller.marshal(object, writer);
ret = writer.toString();
} catch (JAXBException e) {
throw new GenericException(e);
}

return ret;
}

public static <T> T getObjectFromXML(InputStream xml, Class<T> objectClass) throws GenericException {
T ret;
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
package org.roda.core.plugins.base.maintenance.backfill;

import org.roda.core.data.exceptions.AuthorizationDeniedException;
import org.roda.core.data.exceptions.GenericException;
import org.roda.core.data.exceptions.NotFoundException;
import org.roda.core.data.exceptions.RequestNotValidException;
import java.util.List;

import org.roda.core.data.v2.index.IsIndexed;
import org.roda.core.data.v2.ip.AIP;
import org.roda.core.data.v2.ip.IndexedAIP;
import org.roda.core.model.ModelService;
import org.roda.core.plugins.Plugin;

import java.util.List;

/**
* @author Alexandre Flores <[email protected]>
*/
Expand All @@ -22,11 +17,6 @@ protected <I extends IsIndexed> Class<I> getIndexClass() {
return (Class<I>) IndexedAIP.class;
}

@Override
protected AIP retrieveModelObject(ModelService model, String id) throws AuthorizationDeniedException, NotFoundException, GenericException, RequestNotValidException {
return model.retrieveAIP(id);
}

@Override
public Plugin<AIP> cloneMe() {
return new GenerateAIPBackfillPlugin();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,111 +2,28 @@

import java.util.List;

import org.roda.core.data.common.RodaConstants;
import org.roda.core.data.v2.LiteOptionalWithCause;
import org.roda.core.data.v2.Void;
import org.roda.core.data.v2.index.IsIndexed;
import org.roda.core.data.v2.jobs.PluginType;
import org.roda.core.data.v2.jobs.Report;
import org.roda.core.data.v2.log.LogEntry;
import org.roda.core.index.IndexService;
import org.roda.core.model.ModelService;
import org.roda.core.plugins.AbstractPlugin;
import org.roda.core.plugins.Plugin;
import org.roda.core.plugins.PluginException;
import org.roda.core.storage.StorageService;

/**
* @author Alexandre Flores <[email protected]>
*/
public class GenerateActionLogBackfillPlugin extends AbstractPlugin<Void> {
@Override
public String getName() {
return "Generate complete action log index backfill";
}

@Override
public String getDescription() {
return "";
}

@Override
public RodaConstants.PreservationEventType getPreservationEventType() {
return null;
}
public class GenerateActionLogBackfillPlugin extends GenerateRODAEntityBackfillPlugin<LogEntry> {

@Override
public String getPreservationEventDescription() {
return "";
protected <I extends IsIndexed> Class<I> getIndexClass() {
return (Class<I>) LogEntry.class;
}

@Override
public String getPreservationEventSuccessMessage() {
return "";
}

@Override
public String getPreservationEventFailureMessage() {
return "";
}

@Override
public String getPreservationEventSkippedMessage() {
return super.getPreservationEventSkippedMessage();
}

@Override
public PluginType getType() {
return PluginType.MISC;
}

@Override
public List<String> getCategories() {
return List.of();
}

@Override
public Plugin<Void> cloneMe() {
return new GenerateActionLogBackfillPlugin();
}

@Override
public boolean areParameterValuesValid() {
return false;
}

@Override
public void init() throws PluginException {

}

@Override
public List<Class<Void>> getObjectClasses() {
return List.of(Void.class);
}

@Override
public Report beforeAllExecute(IndexService index, ModelService model, StorageService storage) throws PluginException {
return null;
}

@Override
public Report execute(IndexService index, ModelService model, StorageService storage, List<LiteOptionalWithCause> list) throws PluginException {
return null;
}

@Override
public Report afterAllExecute(IndexService index, ModelService model, StorageService storage) throws PluginException {
return null;
}

@Override
public void shutdown() {

public Plugin<LogEntry> cloneMe() {
return new GenerateActionLogBackfillPlugin();
}

@Override
public String getVersionImpl() {
return "";
public List<Class<LogEntry>> getObjectClasses() {
return List.of(LogEntry.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
*/
package org.roda.core.plugins.base.maintenance.backfill;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

Expand All @@ -23,17 +25,16 @@
import org.roda.core.data.v2.IsRODAObject;
import org.roda.core.data.v2.LiteOptionalWithCause;
import org.roda.core.data.v2.Void;
import org.roda.core.data.v2.index.filter.DateIntervalFilterParameter;
import org.roda.core.data.v2.index.filter.Filter;
import org.roda.core.data.v2.index.select.SelectedItems;
import org.roda.core.data.v2.index.select.SelectedItemsAll;
import org.roda.core.data.v2.index.select.SelectedItemsNone;
import org.roda.core.data.v2.ip.TransferredResource;
import org.roda.core.data.v2.ip.metadata.IndexedPreservationAgent;
import org.roda.core.data.v2.index.select.SelectedItemsFilter;
import org.roda.core.data.v2.jobs.Job;
import org.roda.core.data.v2.jobs.PluginParameter;
import org.roda.core.data.v2.jobs.PluginState;
import org.roda.core.data.v2.jobs.PluginType;
import org.roda.core.data.v2.jobs.Report;
import org.roda.core.data.v2.log.LogEntry;
import org.roda.core.data.v2.user.RODAMember;
import org.roda.core.index.IndexService;
import org.roda.core.model.ModelService;
import org.roda.core.plugins.AbstractPlugin;
Expand All @@ -50,18 +51,33 @@
public class GenerateAllRODAEntitiesBackfillPlugin extends AbstractPlugin<Void> {

private static final Logger LOGGER = LoggerFactory.getLogger(GenerateAllRODAEntitiesBackfillPlugin.class);
private int blockSize = 100000;
private HashMap<String, HashSet<String>> initialIdsManifest = new HashMap<>();
private HashMap<String, HashSet<String>> processedIdsManifest = new HashMap<>();
private String outputDirectory = ".";
private boolean onlyGenerateInventory = false;
private Date startDate = null;

private static Map<String, PluginParameter> pluginParameters = new HashMap<>();
private static final Map<String, PluginParameter> pluginParameters = new HashMap<>();

static {
pluginParameters.put(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE,
pluginParameters.put(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY,
PluginParameter
.getBuilder(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE, "Block size", PluginParameter.PluginParameterType.INTEGER)
.withDefaultValue("100000").isMandatory(false)
.withDescription("Number of documents in each index documents block.").build());
.getBuilder(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY, "Output directory",
PluginParameter.PluginParameterType.STRING)
.withDefaultValue(".").isMandatory(true).withDescription("This job's output directory path").build());
pluginParameters.put(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY,
PluginParameter
.getBuilder(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY, "Only generate inventory",
PluginParameter.PluginParameterType.BOOLEAN)
.withDefaultValue("false").isMandatory(true)
.withDescription(
"Whether this job should only generate the inventory of RODA objects and not the index backfill files")
.build());
pluginParameters.put(RodaConstants.PLUGIN_PARAMS_START_DATE,
PluginParameter
.getBuilder(RodaConstants.PLUGIN_PARAMS_START_DATE, "Object starting date",
PluginParameter.PluginParameterType.STRING)
.isMandatory(false).withDescription(
"The last modified data for source objects to process. If not set, all objects will be processed.")
.build());
}

@Override
Expand Down Expand Up @@ -98,15 +114,28 @@ public String getVersionImpl() {
@Override
public List<PluginParameter> getParameters() {
ArrayList<PluginParameter> parameters = new ArrayList<>();
parameters.add(pluginParameters.get(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE));
parameters.add(pluginParameters.get(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY));
parameters.add(pluginParameters.get(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY));
parameters.add(pluginParameters.get(RodaConstants.PLUGIN_PARAMS_START_DATE));
return parameters;
}

@Override
public void setParameterValues(Map<String, String> parameters) throws InvalidParameterException {
super.setParameterValues(parameters);
if (parameters != null && parameters.containsKey(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE)) {
blockSize = Integer.parseInt(parameters.get(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE));
if (parameters != null && parameters.containsKey(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY)) {
outputDirectory = parameters.get(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY);
}
if (parameters != null && parameters.containsKey(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY)) {
onlyGenerateInventory = Boolean.parseBoolean(parameters.get(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY));
}
if (parameters != null && parameters.containsKey(RodaConstants.PLUGIN_PARAMS_START_DATE)) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
try {
startDate = formatter.parse(parameters.get(RodaConstants.PLUGIN_PARAMS_START_DATE));
} catch (ParseException e) {
throw new InvalidParameterException(e);
}
}
}

Expand Down Expand Up @@ -141,7 +170,6 @@ protected Report generateRODAObjectBackfill(ModelService model, Class<? extends
String jobId = IdUtils.createUUID();
String jobName = "Generate index backfill for RODA entity (" + clazz.getSimpleName() + ")";
report = PluginHelper.initPluginReportItem(this, jobId, Job.class);

try {
String username = PluginHelper.getJobUsername(this, model);
Job job = initGenerateBackfillJob(clazz, jobId, jobName, username);
Expand All @@ -165,14 +193,28 @@ protected <T extends IsRODAObject> Job initGenerateBackfillJob(Class<T> clazz, S
job.setId(jobId);
job.setName(jobName);

Map<String, String> pluginParameters = new HashMap<>();
pluginParameters.put(RodaConstants.PLUGIN_PARAMS_BLOCK_SIZE, String.valueOf(blockSize));
job.setPluginParameters(pluginParameters);
Map<String, String> localPluginParameters = new HashMap<>();
localPluginParameters.put(RodaConstants.PLUGIN_PARAMS_OUTPUT_DIRECTORY, outputDirectory);
localPluginParameters.put(RodaConstants.PLUGIN_PARAMS_ONLY_GENERATE_INVENTORY,
String.valueOf(onlyGenerateInventory));
job.setPluginParameters(localPluginParameters);
job.setPluginType(PluginType.MISC);
job.setUsername(username);

job.setPlugin(GenerateBackfillPluginUtils.getGeneratedBackfillPluginName(clazz));
job.setSourceObjects(SelectedItemsAll.create(clazz));
SelectedItems<?> selectedItems;
if (startDate != null) {
SelectedItemsFilter<?> selectedItemsFilter = new SelectedItemsFilter<>();
Filter filter = new Filter();
DateIntervalFilterParameter dateIntervalFilterParameter = new DateIntervalFilterParameter();
dateIntervalFilterParameter.setFromValue(startDate);
filter.add(dateIntervalFilterParameter);
selectedItemsFilter.setFilter(filter);
selectedItems = selectedItemsFilter;
} else {
selectedItems = SelectedItemsAll.create(clazz);
}
job.setSourceObjects(selectedItems);

return job;
}
Expand Down Expand Up @@ -209,28 +251,13 @@ public String getPreservationEventFailureMessage() {

@Override
public Report beforeAllExecute(IndexService index, ModelService model, StorageService storage) {
// Get all currently indexed IDs
/*
* List<Class<? extends IsRODAObject>> classes =
* PluginHelper.getReindexObjectClasses(); for (Class<? extends IsRODAObject>
* clazz : classes) { // TODO handle exceptions try { Class<? extends IsIndexed>
* indexClass = GenerateBackfillPluginUtils.getIndexClass(clazz);
* CloseableIterable<? extends OptionalWithCause<? extends IsIndexed>>
* indexedObjects = index.list(indexClass, List.of("id")); HashSet<String>
* objectIds = new HashSet<>(); indexedObjects.forEach(o ->
* objectIds.add(o.get().getId())); indexedObjects.close();
* initialIdsManifest.put(clazz.getSimpleName(), objectIds); } catch
* (NotFoundException e) { throw new RuntimeException(e); } catch
* (RequestNotValidException e) { throw new RuntimeException(e); } catch
* (GenericException e) { throw new RuntimeException(e); } catch (IOException e)
* { throw new RuntimeException(e); } }
*/
// do nothing
return null;
}

@Override
public Report afterAllExecute(IndexService index, ModelService model, StorageService storage) {

// do nothing
return null;
}

Expand Down
Loading

0 comments on commit 50e4ffb

Please sign in to comment.