Skip to content

Commit

Permalink
Loading of git and dataset info
Browse files Browse the repository at this point in the history
  • Loading branch information
djtfmartin committed Jun 27, 2024
1 parent 0aa7db2 commit c83818a
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 39 deletions.
9 changes: 5 additions & 4 deletions matching-ws/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ RUN git checkout $BACKEND_BRANCH
RUN mvn install -DskipTests

# Store git commit id and log
RUN GIT_COMMIT_HASH=$(git rev-parse HEAD) && \
curl -o /app/backend/git.json -H "Accept: application/vnd.github+json" "https://api.github.com/repos/catalogueoflife/backend/commits/${GIT_COMMIT_HASH}"
RUN curl -o /app/backend/git.json -H "Accept: application/vnd.github+json" "https://api.github.com/repos/catalogueoflife/backend/commits/$(git rev-parse HEAD)"

# Run a script to validate the arguments
RUN if [ -n "$CLB_DATASET_ID" ]; then \
Expand Down Expand Up @@ -49,8 +48,8 @@ FROM openjdk:17.0.1-jdk-slim
ARG DEBIAN_FRONTEND=noninteractive
ENV SERVER_PORT=8080
ENV V1_ENABLED="false"
ARG USER=matching
ARG APP_ARTIFACT=matching-ws
ENV USER=matching
ENV APP_ARTIFACT=matching-ws

# Set environment variables
ARG CLB_DATASET_ID=""
Expand Down Expand Up @@ -86,6 +85,8 @@ RUN if [ -n "$CLB_DATASET_ID" ]; then \
--index.path=/data/$APP_ARTIFACT/index; \
fi

RUN chown -R $USER:$USER /opt/gbif/$APP_ARTIFACT

USER $USER
EXPOSE $SERVER_PORT

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ public IndexMetadata getIndexMetadata(){
metadata.setSizeInMB((usedSpace / 1024) / 1024);

metadata.setDatasetTitle((String) readDatasetInfo().getOrDefault("datasetTitle", null));
metadata.setGitInfo(readGitInfo());
metadata.setDatasetKey((String) readDatasetInfo().getOrDefault("datasetKey", null));
metadata.setBuildInfo(readGitInfo());

} catch (IOException e) {
e.printStackTrace();
Expand Down Expand Up @@ -125,14 +126,18 @@ public Map<String, Object> readGitInfo() {
JsonNode rootNode = mapper.readTree(new File(workingDir + "/git.json"));

// Navigate to the author node
String sha = rootNode.path("sha").asText();
String url = rootNode.path("url").asText();
String html_url = rootNode.path("html_url").asText();
String message = rootNode.path("commit").path("message").asText();
JsonNode authorNode = rootNode.path("commit").path("author");

// Retrieve author information
String name = authorNode.path("name").asText();
String email = authorNode.path("email").asText();
String date = authorNode.path("date").asText();

return Map.of("name", name, "email", email, "date", date);
return Map.of("sha", sha, "url", url, "html_url", html_url, "name", name, "email", email, "date", date, "message", message );
} catch (IOException e) {
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
@JsonInclude(JsonInclude.Include.NON_EMPTY)
@Data
public class IndexMetadata {
Long datasetKey;
String datasetKey;
String datasetTitle;
String created;
Long sizeInMB = 0L;
Long taxonCount = 0L;
Map<String, Long> taxaByRankCount = new HashMap<>();
Map<String, Object> gitInfo = new HashMap<>();
Map<String, Object> buildInfo = new HashMap<>();
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,22 @@
import org.gbif.nameparser.api.Rank;
import org.gbif.nameparser.api.UnparsableNameException;
import org.gbif.nameparser.util.NameFormatter;

import org.jetbrains.annotations.NotNull;
import org.mybatis.spring.SqlSessionFactoryBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

/**
* Service to index a dataset from the Checklist Bank.
*/
@Service
public class IndexingService {

private static Logger LOG = LoggerFactory.getLogger(IndexingService.class);
private static final Logger LOG = LoggerFactory.getLogger(IndexingService.class);

@Value("${index.path:/tmp/matching-index}")
String indexPath;
Expand Down Expand Up @@ -124,8 +129,6 @@ public void writeCLBToFile(final String datasetKey) throws Exception {
LOG.info("Records written to file {}: {}", fileName, counter.get());
}



public static Directory newMemoryIndex(Iterable<NameUsage> usages) throws IOException {
LOG.info("Start building a new RAM index");
Directory dir = new ByteBuffersDirectory();
Expand Down Expand Up @@ -154,31 +157,28 @@ private static IndexWriter getIndexWriter(Directory dir) throws IOException {
public void indexFile(String exportPath, String indexPath) throws Exception {

// Create index directory
if (new File(indexPath).exists()) {
FileUtils.forceDelete(new File(indexPath));
}
Path indexDirectory = Paths.get(indexPath);
Path indexDirectory = initialiseIndexDirectory(indexPath);
Directory directory = FSDirectory.open(indexDirectory);

// Create index writer configuration
IndexWriterConfig config = getIndexWriterConfig();
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

// Create a session factory
LOG.info("Indexing dataset...");
LOG.info("Indexing dataset from CSV...");
final AtomicInteger counter = new AtomicInteger(0);

// FIXME - looks for csv files in the export path
final String filePath = exportPath + "/index.csv";

// FIXME - validate the file

// File source, String encoding, String delimiter, Character quotes, Integer headerRows
try (CSVReader reader = new CSVReader(new FileReader(filePath), ',', '"');
IndexWriter indexWriter = new IndexWriter(directory, config)) {

String[] row = reader.readNext();
while (row != null) {
if (row.length != 7) {
LOG.warn("Skipping row with invalid number of columns: {}", String.join(",", row));
row = reader.readNext();
continue;
}
NameUsage nameUsage =
NameUsage.builder()
.id(row[0])
Expand Down Expand Up @@ -209,10 +209,7 @@ public void runDatasetIndexing(final Integer datasetKey) throws Exception {
PooledDataSource dataSource = new PooledDataSource(clDriver, clbUrl, clbUser, clPassword);

// Create index directory
if (new File(indexPath).exists()) {
FileUtils.forceDelete(new File(indexPath));
}
Path indexDirectory = Paths.get(indexPath);
Path indexDirectory = initialiseIndexDirectory(indexPath);
Directory directory = FSDirectory.open(indexDirectory);

// Create index writer configuration
Expand Down Expand Up @@ -250,10 +247,16 @@ public void runDatasetIndexing(final Integer datasetKey) throws Exception {
LOG.info("Indexed: {}", counter.get());
}

private @NotNull Path initialiseIndexDirectory(String indexPath) throws IOException {
if (new File(indexPath).exists()) {
FileUtils.forceDelete(new File(indexPath));
}
return Paths.get(indexPath);
}

protected static Document toDoc(NameUsage nameUsage) {

Document doc = new Document();

/*
Porting notes: The canonical name *sensu strictu* with nothing else but three name parts at
most (genus, species, infraspecific). No rank or hybrid markers and no authorship,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class MatchingApplication implements ApplicationRunner {
@Override
public void run(ApplicationArguments args) {
// generate the index metadata if not present
matchingService.getIndexMetadata();
LOG.info("Web services started");
IndexMetadata metadata = matchingService.getIndexMetadata();
LOG.info("Web services started. Index size: {} taxa", metadata.getTaxonCount());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ private static boolean any(String... x) {

private static void appendIfExists(StringBuilder sb, @Nullable String x) {
if (exists(x)) {
if (sb.length() > 0) {
if (!sb.isEmpty()) {
sb.append(" ");
}
sb.append(x.trim());
Expand Down Expand Up @@ -266,8 +266,7 @@ private static Rank lowestRank(LinneanClassification cl) {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof NameNRank)) return false;
NameNRank nameNRank = (NameNRank) o;
if (!(o instanceof NameNRank nameNRank)) return false;
return Objects.equals(name, nameNRank.name) && rank == nameNRank.rank;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,6 @@ public static URL url(String... path) {
return null;
}

/**
* @param filename of dictionary file requested
* @return url to file inside to dictionary folder of rs.gbif.org
*/
public static URL dictionaryUrl(String filename) {
return url(DICT_DIR, filename);
}

/**
* @param filename of authority dictionary file requested
* @return url to file inside to authority folder of rs.gbif.org
Expand Down

0 comments on commit c83818a

Please sign in to comment.