Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug where ingestion failed for input document containing list of nested objects #1040

Merged
merged 3 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988))
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,23 +419,26 @@ private void putNLPResultToSourceMapForMapType(
if (sourceValue instanceof Map) {
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
if (sourceAndMetadataMap.get(processorKey) instanceof List) {
// build nlp output for list of nested objects
Iterator<Object> inputNestedMapValueIt = ((List<Object>) inputNestedMapEntry.getValue()).iterator();
for (Map<String, Object> nestedElement : (List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey)) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++));
}
if (inputNestedMapEntry.getValue() instanceof List) {
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
(List<Object>) inputNestedMapEntry.getValue()
);
} else if (inputNestedMapEntry.getValue() instanceof Map) {
heemin32 marked this conversation as resolved.
Show resolved Hide resolved
processMapEntryValue(
results,
indexWrapper,
(List<Map<String, Object>>) sourceAndMetadataMap.get(processorKey),
inputNestedMapEntry.getKey(),
inputNestedMapEntry.getValue()
);
}
} else {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap;
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceMap = new HashMap<>();
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSourceMapForMapType(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
Expand All @@ -456,6 +459,97 @@ private void putNLPResultToSourceMapForMapType(
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
List<Object> inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is list type
Iterator<Object> inputNestedMapValueIt = inputNestedMapEntryValue.iterator();
for (Map<String, Object> nestedElement : sourceAndMetadataMapValueInList) {
// Only fill in when value is not null
if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) {
nestedElement.put(inputNestedMapEntryKey, results.get(indexWrapper.index++));
}
}
}

private void processMapEntryValue(
List<?> results,
IndexWrapper indexWrapper,
List<Map<String, Object>> sourceAndMetadataMapValueInList,
String inputNestedMapEntryKey,
Object inputNestedMapEntryValue
) {
// build nlp output for object in sourceValue which is map type
Iterator<Map<String, Object>> iterator = sourceAndMetadataMapValueInList.iterator();
IntStream.range(0, sourceAndMetadataMapValueInList.size()).forEach(index -> {
Map<String, Object> nestedElement = iterator.next();
putNLPResultToSingleSourceMapInList(
inputNestedMapEntryKey,
inputNestedMapEntryValue,
results,
indexWrapper,
nestedElement,
index
);
});
}

/**
* Put nlp result to single source element, which is in a list field of source document
* Such source element is in map type
*
* @param processorKey
* @param sourceValue
* @param results
* @param indexWrapper
* @param sourceAndMetadataMap
* @param nestedElementIndex index of the element in the list field of source document
*/
@SuppressWarnings("unchecked")
private void putNLPResultToSingleSourceMapInList(
String processorKey,
Object sourceValue,
List<?> results,
IndexWrapper indexWrapper,
Map<String, Object> sourceAndMetadataMap,
int nestedElementIndex
) {
if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return;
if (sourceValue instanceof Map) {
heemin32 marked this conversation as resolved.
Show resolved Hide resolved
heemin32 marked this conversation as resolved.
Show resolved Hide resolved
for (Map.Entry<String, Object> inputNestedMapEntry : ((Map<String, Object>) sourceValue).entrySet()) {
Pair<String, Object> processedNestedKey = processNestedKey(inputNestedMapEntry);
Map<String, Object> sourceMap = getSourceMapBySourceAndMetadataMap(processorKey, sourceAndMetadataMap);
putNLPResultToSingleSourceMapInList(
processedNestedKey.getKey(),
processedNestedKey.getValue(),
results,
indexWrapper,
sourceMap,
nestedElementIndex
);
}
} else {
if (sourceValue instanceof List && ((List<Object>) sourceValue).get(nestedElementIndex) != null) {
sourceAndMetadataMap.merge(processorKey, results.get(indexWrapper.index++), REMAPPING_FUNCTION);
}
}
}

@SuppressWarnings("unchecked")
private Map<String, Object> getSourceMapBySourceAndMetadataMap(String processorKey, Map<String, Object> sourceAndMetadataMap) {
Map<String, Object> sourceMap = new HashMap<>();
if (sourceAndMetadataMap.get(processorKey) == null) {
sourceAndMetadataMap.put(processorKey, sourceMap);
} else {
sourceMap = (Map<String, Object>) sourceAndMetadataMap.get(processorKey);
}
return sourceMap;
}

private List<Map<String, Object>> buildNLPResultForListType(List<String> sourceValue, List<?> results, IndexWrapper indexWrapper) {
List<Map<String, Object>> keyToResult = new ArrayList<>();
IntStream.range(0, sourceValue.size())
Expand Down
Loading
Loading