Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8623 solr index vocab #8624

Merged
merged 6 commits into from
May 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/dataverse-parent/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
<!-- Major system components and dependencies -->
<payara.version>5.2021.6</payara.version>
<postgresql.version>42.3.3</postgresql.version>
<solr.version>8.8.1</solr.version>
<solr.version>8.11.1</solr.version>
<aws.version>1.11.762</aws.version>
<google.cloud.version>0.157.0</google.cloud.version>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths)
for(String locale: langs) {
solrInputDocument.addField(SearchFields.DATAVERSE_SUBJECT, dataverseSubject.getLocaleStrValue(locale));
}
if (langs.isEmpty()) {
solrInputDocument.addField(SearchFields.DATAVERSE_SUBJECT, dataverseSubject.getStrValue());
}

// collapse into shared "subject" field used as a facet
solrInputDocument.addField(SearchFields.SUBJECT, subject);
}
Expand Down Expand Up @@ -726,8 +730,8 @@ private IndexResponse indexDatasetPermissions(Dataset dataset) {
private String addOrUpdateDataset(IndexableDataset indexableDataset) throws SolrServerException, IOException {
return addOrUpdateDataset(indexableDataset, null);
}
private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> datafilesInDraftVersion) throws SolrServerException, IOException {

public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long> datafilesInDraftVersion) throws SolrServerException, IOException {
IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Expand Down Expand Up @@ -898,10 +902,16 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
continue;
}

// Index in all used languages (display and metadata languages
for(String locale: langs) {
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getLocaleStrValue(locale));
if (!dsfType.isAllowMultiples() || langs.isEmpty()) {
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
} else {
for(String locale: langs) {
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getLocaleStrValue(locale));
}
}

if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
}
Expand Down Expand Up @@ -1288,9 +1298,16 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
solrInputDocument.addField(SearchFields.EMBARGO_END_DATE, embargoEndDate.toEpochDay());
}
}
Long datasetId = dataset.getId();
final String msg = "indexed dataset " + datasetId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
return new SolrInputDocuments(docs, msg, datasetId);
}

private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> datafilesInDraftVersion) throws SolrServerException, IOException {
final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion);

try {
solrClientService.getSolrClient().add(docs);
solrClientService.getSolrClient().add(docs.getDocuments());
solrClientService.getSolrClient().commit();
} catch (SolrServerException | IOException ex) {
if (ex.getCause() instanceof SolrServerException) {
Expand All @@ -1299,19 +1316,17 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
throw new IOException(ex);
}
}

Long dsId = dataset.getId();
/// Dataset updatedDataset =
/// (Dataset)dvObjectService.updateContentIndexTime(dataset);
/// updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
DvObject dvObjectToModify = em.find(DvObject.class, docs.getDatasetId());
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;

return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
return docs.getMessage();
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package edu.harvard.iq.dataverse.search;

import java.util.Collection;

import org.apache.solr.common.SolrInputDocument;

public class SolrInputDocuments {
private Collection<SolrInputDocument> documents;
private String message;
private Long datasetId;

public SolrInputDocuments(Collection<SolrInputDocument> documents, String message, Long datasetId) {
this.documents = documents;
this.message = message;
this.datasetId = datasetId;
}

public Collection<SolrInputDocument> getDocuments() {
return documents;
}

public String getMessage() {
return message;
}

public Long getDatasetId() {
return datasetId;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package edu.harvard.iq.dataverse.search;

import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import org.apache.solr.client.solrj.SolrServerException;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;

import edu.harvard.iq.dataverse.ControlledVocabularyValue;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.Dataverse.DataverseType;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.harvard.iq.dataverse.branding.BrandingUtil;
import edu.harvard.iq.dataverse.mocks.MocksFactory;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;

public class IndexServiceBeanTest {
private static final Logger logger = Logger.getLogger(IndexServiceBeanTest.class.getCanonicalName());

private IndexServiceBean indexService;
private Dataverse dataverse;

@Before
public void setUp() {
dataverse = MocksFactory.makeDataverse();
dataverse.setDataverseType(DataverseType.UNCATEGORIZED);
indexService = new IndexServiceBean();
indexService.systemConfig = new SystemConfig();
indexService.settingsService = Mockito.mock(SettingsServiceBean.class);
indexService.dataverseService = Mockito.mock(DataverseServiceBean.class);
indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class);
BrandingUtil.injectServices(indexService.dataverseService, indexService.settingsService);

Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse);
}

@Test
public void TestIndexing() throws SolrServerException, IOException {
final IndexableDataset indexableDataset = createIndexableDataset();
final SolrInputDocuments docs = indexService.toSolrDocs(indexableDataset, null);
Set<String> indexedFields = docs.getDocuments().stream().flatMap(x -> x.getFieldNames().stream()).collect(Collectors.toSet());

logger.info(docs.getMessage());
logger.info(String.join(", ", indexedFields));

assertTrue(!docs.getDocuments().isEmpty());
assertTrue(indexedFields.contains("language"));
}

private IndexableDataset createIndexableDataset() {
final Dataset dataset = MocksFactory.makeDataset();
String fakeId = "doi:10.666/FAKE/fake";
dataset.setGlobalId(new GlobalId(fakeId));
final DatasetVersion datasetVersion = dataset.getCreateVersion(null);
DatasetField field = createCVVField("language", "English", false);
datasetVersion.getDatasetFields().add(field);
final IndexableDataset indexableDataset = new IndexableDataset(datasetVersion);
indexableDataset.getDatasetVersion().getDataset().setOwner(dataverse);
return indexableDataset;
}

public static DatasetField createCVVField(String name, String strValue, boolean isAllowMultiples) {
DatasetFieldType datasetFieldType = new DatasetFieldType(name, DatasetFieldType.FieldType.TEXT, isAllowMultiples);
ControlledVocabularyValue value = new ControlledVocabularyValue(MocksFactory.nextId(), strValue, datasetFieldType);
datasetFieldType.setControlledVocabularyValues(Arrays.asList(value));
datasetFieldType.setId(MocksFactory.nextId());
datasetFieldType.setMetadataBlock(new MetadataBlock());
DatasetField field = new DatasetField();
field.setId(MocksFactory.nextId());
field.setSingleControlledVocabularyValue(value);
field.setDatasetFieldType(datasetFieldType);
return field;
}
}