-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into hotfix/null-point
# Conflicts: # search-commons/src/main/java/no/unit/nva/search/resource/SimplifiedMutator.java
- Loading branch information
Showing
150 changed files
with
7,538 additions
and
7,077 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
# style: Reformat project with Spotless | ||
2dfd69cbc977fa45922485f1d252d81fc0da7447 | ||
|
||
# style: Reformat project | ||
0825cca42191e030ef7c46cc9d86c8d004370d85 |
1 change: 0 additions & 1 deletion
1
batch-index/src/main/java/no/unit/nva/indexingclient/AggregationsValidator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
174 changes: 85 additions & 89 deletions
174
batch-index/src/main/java/no/unit/nva/indexingclient/BatchIndexer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,106 +1,102 @@ | ||
package no.unit.nva.indexingclient; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import no.unit.nva.identifiers.SortableIdentifier; | ||
import no.unit.nva.indexingclient.models.IndexDocument; | ||
import no.unit.nva.s3.ListingResult; | ||
import no.unit.nva.s3.S3Driver; | ||
|
||
import nva.commons.core.paths.UnixPath; | ||
|
||
import org.opensearch.action.bulk.BulkItemResponse; | ||
import org.opensearch.action.bulk.BulkItemResponse.Failure; | ||
import org.opensearch.action.bulk.BulkResponse; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import software.amazon.awssdk.services.s3.S3Client; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
public class BatchIndexer implements IndexingResult<SortableIdentifier> { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(BatchIndexer.class); | ||
private final ImportDataRequestEvent importDataRequest; | ||
private final S3Driver s3Driver; | ||
private final IndexingClient openSearchRestClient; | ||
private final int numberOfFilesPerEvent; | ||
private IndexingResultRecord<SortableIdentifier> processingResult; | ||
|
||
public BatchIndexer( | ||
ImportDataRequestEvent importDataRequestEvent, | ||
S3Client s3Client, | ||
IndexingClient openSearchRestClient, | ||
int numberOfFilesPerEvent) { | ||
this.importDataRequest = importDataRequestEvent; | ||
this.openSearchRestClient = openSearchRestClient; | ||
this.s3Driver = new S3Driver(s3Client, importDataRequestEvent.getBucket()); | ||
this.numberOfFilesPerEvent = numberOfFilesPerEvent; | ||
} | ||
|
||
public IndexingResult<SortableIdentifier> processRequest() { | ||
|
||
ListingResult listFilesResult = fetchNextPageOfFilenames(); | ||
List<IndexDocument> contents = | ||
fileContents(listFilesResult.getFiles()).collect(Collectors.toList()); | ||
List<SortableIdentifier> failedResults = indexFileContents(contents); | ||
this.processingResult = | ||
new IndexingResultRecord<>( | ||
failedResults, | ||
listFilesResult.getListingStartingPoint(), | ||
listFilesResult.isTruncated()); | ||
|
||
return this; | ||
} | ||
|
||
private Stream<IndexDocument> fileContents(List<UnixPath> files) { | ||
return files.stream().map(s3Driver::getFile).map(IndexDocument::fromJsonString); | ||
} | ||
|
||
@Override | ||
public List<SortableIdentifier> failedResults() { | ||
return this.processingResult.failedResults(); | ||
} | ||
|
||
@Override | ||
public String nextStartMarker() { | ||
return processingResult.nextStartMarker(); | ||
} | ||
|
||
@Override | ||
public boolean truncated() { | ||
return this.processingResult.truncated(); | ||
} | ||
|
||
private ListingResult fetchNextPageOfFilenames() { | ||
return s3Driver.listFiles( | ||
UnixPath.of(importDataRequest.getS3Path()), | ||
importDataRequest.getStartMarker(), | ||
numberOfFilesPerEvent); | ||
} | ||
|
||
private List<SortableIdentifier> indexFileContents(List<IndexDocument> contents) { | ||
|
||
Stream<BulkResponse> result = openSearchRestClient.batchInsert(contents.stream()); | ||
List<SortableIdentifier> failures = collectFailures(result).collect(Collectors.toList()); | ||
failures.forEach(this::logFailure); | ||
return failures; | ||
} | ||
|
||
private <T> void logFailure(T failureMessage) { | ||
logger.warn("Failed to index resource:{}", failureMessage); | ||
} | ||
|
||
private Stream<SortableIdentifier> collectFailures(Stream<BulkResponse> indexActions) { | ||
return indexActions | ||
.filter(BulkResponse::hasFailures) | ||
.map(BulkResponse::getItems) | ||
.flatMap(Arrays::stream) | ||
.filter(BulkItemResponse::isFailed) | ||
.map(BulkItemResponse::getFailure) | ||
.map(Failure::getId) | ||
.map(SortableIdentifier::new); | ||
} | ||
private static final Logger logger = LoggerFactory.getLogger(BatchIndexer.class); | ||
private final ImportDataRequestEvent importDataRequest; | ||
private final S3Driver s3Driver; | ||
private final IndexingClient openSearchRestClient; | ||
private final int numberOfFilesPerEvent; | ||
private IndexingResultRecord<SortableIdentifier> processingResult; | ||
|
||
public BatchIndexer( | ||
ImportDataRequestEvent importDataRequestEvent, | ||
S3Client s3Client, | ||
IndexingClient openSearchRestClient, | ||
int numberOfFilesPerEvent) { | ||
this.importDataRequest = importDataRequestEvent; | ||
this.openSearchRestClient = openSearchRestClient; | ||
this.s3Driver = new S3Driver(s3Client, importDataRequestEvent.getBucket()); | ||
this.numberOfFilesPerEvent = numberOfFilesPerEvent; | ||
} | ||
|
||
public IndexingResult<SortableIdentifier> processRequest() { | ||
|
||
ListingResult listFilesResult = fetchNextPageOfFilenames(); | ||
List<IndexDocument> contents = | ||
fileContents(listFilesResult.getFiles()).collect(Collectors.toList()); | ||
List<SortableIdentifier> failedResults = indexFileContents(contents); | ||
this.processingResult = | ||
new IndexingResultRecord<>( | ||
failedResults, | ||
listFilesResult.getListingStartingPoint(), | ||
listFilesResult.isTruncated()); | ||
|
||
return this; | ||
} | ||
|
||
private Stream<IndexDocument> fileContents(List<UnixPath> files) { | ||
return files.stream().map(s3Driver::getFile).map(IndexDocument::fromJsonString); | ||
} | ||
|
||
@Override | ||
public List<SortableIdentifier> failedResults() { | ||
return this.processingResult.failedResults(); | ||
} | ||
|
||
@Override | ||
public String nextStartMarker() { | ||
return processingResult.nextStartMarker(); | ||
} | ||
|
||
@Override | ||
public boolean truncated() { | ||
return this.processingResult.truncated(); | ||
} | ||
|
||
private ListingResult fetchNextPageOfFilenames() { | ||
return s3Driver.listFiles( | ||
UnixPath.of(importDataRequest.getS3Path()), | ||
importDataRequest.getStartMarker(), | ||
numberOfFilesPerEvent); | ||
} | ||
|
||
private List<SortableIdentifier> indexFileContents(List<IndexDocument> contents) { | ||
|
||
Stream<BulkResponse> result = openSearchRestClient.batchInsert(contents.stream()); | ||
List<SortableIdentifier> failures = collectFailures(result).collect(Collectors.toList()); | ||
failures.forEach(this::logFailure); | ||
return failures; | ||
} | ||
|
||
private <T> void logFailure(T failureMessage) { | ||
logger.warn("Failed to index resource:{}", failureMessage); | ||
} | ||
|
||
private Stream<SortableIdentifier> collectFailures(Stream<BulkResponse> indexActions) { | ||
return indexActions | ||
.filter(BulkResponse::hasFailures) | ||
.map(BulkResponse::getItems) | ||
.flatMap(Arrays::stream) | ||
.filter(BulkItemResponse::isFailed) | ||
.map(BulkItemResponse::getFailure) | ||
.map(Failure::getId) | ||
.map(SortableIdentifier::new); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.