Skip to content

Commit

Permalink
Merge pull request IQSS#10598 from Recherche-Data-Gouv/8941-adding-fi…
Browse files Browse the repository at this point in the history
…leCount-in-solr

8941 adding file count in solr (v2)
  • Loading branch information
ofahimIQSS authored Nov 22, 2024
2 parents b8e4758 + aa3f855 commit f95c1a0
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 14 deletions.
1 change: 1 addition & 0 deletions conf/solr/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@
<field name="datasetValid" type="boolean" stored="true" indexed="true" multiValued="false"/>

<field name="license" type="string" stored="true" indexed="true" multiValued="false"/>
<field name="fileCount" type="plong" stored="true" indexed="true" multiValued="false"/>

<!--
METADATA SCHEMA FIELDS
Expand Down
15 changes: 15 additions & 0 deletions doc/release-notes/8941-adding-fileCount-in-solr.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
## Release Highlights

### Adding fileCount as SOLR field

A new search field called `fileCount` can be searched to discover the number of files per dataset. (#10598)

## Upgrade Instructions

1. Update your Solr `schema.xml` to include the new field.
For details, please see https://guides.dataverse.org/en/latest/admin/metadatacustomization.html#updating-the-solr-schema

2. Reindex Solr.
Once the schema.xml is updated, Solr must be restarted and a reindex initiated.
For details, see https://guides.dataverse.org/en/latest/admin/solr-search-index.html but here is the reindex command:
`curl http://localhost:8080/api/admin/index`
11 changes: 1 addition & 10 deletions src/main/java/edu/harvard/iq/dataverse/api/Search.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public Response search(
JsonArrayBuilder itemsArrayBuilder = Json.createArrayBuilder();
List<SolrSearchResult> solrSearchResults = solrQueryResponse.getSolrSearchResults();
for (SolrSearchResult solrSearchResult : solrSearchResults) {
itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields, getDatasetFileCount(solrSearchResult)));
itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields));
}

JsonObjectBuilder spelling_alternatives = Json.createObjectBuilder();
Expand Down Expand Up @@ -229,15 +229,6 @@ public Response search(
}
}

private Long getDatasetFileCount(SolrSearchResult solrSearchResult) {
DvObject dvObject = solrSearchResult.getEntity();
if (dvObject.isInstanceofDataset()) {
DatasetVersion datasetVersion = ((Dataset) dvObject).getVersionFromId(solrSearchResult.getDatasetVersionId());
return datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion);
}
return null;
}

private User getUser(ContainerRequestContext crc) throws WrappedResponse {
User userToExecuteSearchAs = GuestUser.get();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ public class IndexServiceBean {
@EJB
DatasetFieldServiceBean datasetFieldService;

@Inject
DatasetVersionFilesServiceBean datasetVersionFilesServiceBean;

public static final String solrDocIdentifierDataverse = "dataverse_";
public static final String solrDocIdentifierFile = "datafile_";
public static final String solrDocIdentifierDataset = "dataset_";
Expand Down Expand Up @@ -1018,6 +1021,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));

solrInputDocument.addField(SearchFields.FILE_COUNT, datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion));

if (datasetVersion.isInReview()) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,5 +291,6 @@ more targeted results for just datasets. The format is YYYY (i.e.
public static final String DATASET_VALID = "datasetValid";

public static final String DATASET_LICENSE = "license";
public static final String FILE_COUNT = "fileCount";

}
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,8 @@ public SolrQueryResponse search(
Long retentionEndDate = (Long) solrDocument.getFieldValue(SearchFields.RETENTION_END_DATE);
//
Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID);

Long fileCount = (Long) solrDocument.getFieldValue(SearchFields.FILE_COUNT);

List<String> matchedFields = new ArrayList<>();

SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
Expand Down Expand Up @@ -570,6 +571,7 @@ public SolrQueryResponse search(
solrSearchResult.setDeaccessionReason(deaccessionReason);
solrSearchResult.setDvTree(dvTree);
solrSearchResult.setDatasetValid(datasetValid);
solrSearchResult.setFileCount(fileCount);

if (Boolean.TRUE.equals((Boolean) solrDocument.getFieldValue(SearchFields.IS_HARVESTED))) {
solrSearchResult.setHarvested(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ public class SolrSearchResult {
private String citation;
private String citationHtml;
private String datasetType;
/**
* Only Dataset can have a file count
*/
private Long fileCount;
/**
* Files and datasets might have a UNF. Dataverses don't.
*/
Expand Down Expand Up @@ -456,10 +460,10 @@ public JsonObjectBuilder getJsonForMyData(boolean isValid) {
} // getJsonForMydata

public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) {
return json(showRelevance, showEntityIds, showApiUrls, null, null);
return json(showRelevance, showEntityIds, showApiUrls, null);
}

public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List<String> metadataFields, Long datasetFileCount) {
public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List<String> metadataFields) {
if (this.type == null) {
return jsonObjectBuilder();
}
Expand Down Expand Up @@ -597,7 +601,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool
subjects.add(subject);
}
nullSafeJsonBuilder.add("subjects", subjects);
nullSafeJsonBuilder.add("fileCount", datasetFileCount);
nullSafeJsonBuilder.add("fileCount", this.fileCount);
nullSafeJsonBuilder.add("versionId", dv.getId());
nullSafeJsonBuilder.add("versionState", dv.getVersionState().toString());
if (this.isPublishedState()) {
Expand Down Expand Up @@ -1348,4 +1352,12 @@ public boolean isValid(Predicate<SolrSearchResult> canUpdateDataset) {
}
return !canUpdateDataset.test(this);
}

public Long getFileCount() {
return fileCount;
}

public void setFileCount(Long fileCount) {
this.fileCount = fileCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public void setUp() {
indexService.dataverseService = Mockito.mock(DataverseServiceBean.class);
indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class);
indexService.datasetVersionService = Mockito.mock(DatasetVersionServiceBean.class);
indexService.datasetVersionFilesServiceBean = Mockito.mock(DatasetVersionFilesServiceBean.class);
BrandingUtil.injectServices(indexService.dataverseService, indexService.settingsService);

Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse);
Expand Down

0 comments on commit f95c1a0

Please sign in to comment.