Skip to content

Commit

Permalink
Merge pull request #10710 from QualitativeDataRepository/QDR-improve_…
Browse files Browse the repository at this point in the history
…index_status_speed

IQSS/10709 - Improve index/status and /clear-orphan speed
  • Loading branch information
landreev committed Sep 19, 2024
2 parents 1fffec4 + b569fdf commit d4e9a4f
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 61 deletions.
12 changes: 12 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -1405,4 +1405,16 @@ public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer pare

return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize);
}

public boolean isInReleasedVersion(Long id) {
Query query = em.createQuery("SELECT fm.id FROM FileMetadata fm, DvObject dvo WHERE fm.datasetVersion.id=(SELECT dv.id FROM DatasetVersion dv WHERE dv.dataset.id=dvo.owner.id and dv.versionState=edu.harvard.iq.dataverse.DatasetVersion.VersionState.RELEASED ORDER BY dv.versionNumber DESC, dv.minorVersionNumber DESC LIMIT 1) AND dvo.id=fm.dataFile.id AND fm.dataFile.id=:fid");
query.setParameter("fid", id);

try {
query.getSingleResult();
return true;
} catch (Exception ex) {
return false;
}
}
}
12 changes: 12 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -1071,4 +1071,16 @@ public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Lo
}
}

public List<String> getVersionStates(long id) {
try {
Query query = em.createNativeQuery("SELECT dv.versionState FROM datasetversion dv WHERE dataset_id=? ORDER BY id");
query.setParameter(1, id);
return (List<String>) query.getResultList();

} catch (Exception ex) {
logger.log(Level.WARNING, "exception trying to get versionstates of dataset " + id + ": {0}", ex);
return null;
}
}

}
2 changes: 0 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/DvObject.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
query = "SELECT o FROM DvObject o ORDER BY o.id"),
@NamedQuery(name = "DvObject.findById",
query = "SELECT o FROM DvObject o WHERE o.id=:id"),
@NamedQuery(name = "DvObject.checkExists",
query = "SELECT count(o) from DvObject o WHERE o.id=:id"),
@NamedQuery(name = "DvObject.ownedObjectsById",
query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"),
@NamedQuery(name = "DvObject.findByGlobalId",
Expand Down
15 changes: 9 additions & 6 deletions src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ public List<DvObject> findByAuthenticatedUserId(AuthenticatedUser user) {
return query.getResultList();
}

public boolean checkExists(Long id) {
Query query = em.createNamedQuery("DvObject.checkExists");
query.setParameter("id", id);
Long result =(Long)query.getSingleResult();
return result > 0;
}
public String getDtype(Long id) {
Query query = em.createNativeQuery("SELECT dvo.dtype FROM dvobject dvo WHERE dvo.id=?");
query.setParameter(1, id);
try {
return (String) query.getSingleResult();
} catch (NoResultException e) {
return null;
}
}

public DvObject findByGlobalId(String globalIdString, DvObject.DType dtype) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,35 +50,31 @@ public class IndexBatchServiceBean {
public Future<JsonObjectBuilder> indexStatus() {
JsonObjectBuilder response = Json.createObjectBuilder();
logger.info("Beginning indexStatus()");
JsonObject contentInDatabaseButStaleInOrMissingFromSolr = getContentInDatabaseButStaleInOrMissingFromSolr().build();
JsonObject contentInSolrButNotDatabase = null;
JsonObject permissionsInSolrButNotDatabase = null;
try {
contentInSolrButNotDatabase = getContentInSolrButNotDatabase().build();
permissionsInSolrButNotDatabase = getPermissionsInSolrButNotDatabase().build();

} catch (SearchException ex) {
JsonObject contentInDatabaseButStaleInOrMissingFromSolr = getContentInDatabaseButStaleInOrMissingFromSolr().build();
JsonObject contentInSolrButNotDatabase = getContentInSolrButNotDatabase().build();
JsonObject permissionsInSolrButNotDatabase = getPermissionsInSolrButNotDatabase().build();
JsonObject permissionsInDatabaseButStaleInOrMissingFromSolr = getPermissionsInDatabaseButStaleInOrMissingFromSolr().build();

response
.add("contentInDatabaseButStaleInOrMissingFromIndex", contentInDatabaseButStaleInOrMissingFromSolr)
.add("contentInIndexButNotDatabase", contentInSolrButNotDatabase)
.add("permissionsInDatabaseButStaleInOrMissingFromIndex", permissionsInDatabaseButStaleInOrMissingFromSolr)
.add("permissionsInIndexButNotDatabase", permissionsInSolrButNotDatabase);

logger.log(Level.INFO, "contentInDatabaseButStaleInOrMissingFromIndex: {0}", contentInDatabaseButStaleInOrMissingFromSolr);
logger.log(Level.INFO, "contentInIndexButNotDatabase: {0}", contentInSolrButNotDatabase);
logger.log(Level.INFO, "permissionsInDatabaseButStaleInOrMissingFromIndex: {0}", permissionsInDatabaseButStaleInOrMissingFromSolr);
logger.log(Level.INFO, "permissionsInIndexButNotDatabase: {0}", permissionsInSolrButNotDatabase);
} catch (Exception ex) {
String msg = "Can not determine index status. " + ex.getLocalizedMessage() + ". Is Solr down? Exception: " + ex.getCause().getLocalizedMessage();
logger.info(msg);
ex.printStackTrace();
response.add("SearchException ", msg);
return new AsyncResult<>(response);
}

JsonObject permissionsInDatabaseButStaleInOrMissingFromSolr = getPermissionsInDatabaseButStaleInOrMissingFromSolr().build();

JsonObjectBuilder data = Json.createObjectBuilder()
.add("contentInDatabaseButStaleInOrMissingFromIndex", contentInDatabaseButStaleInOrMissingFromSolr)
.add("contentInIndexButNotDatabase", contentInSolrButNotDatabase)
.add("permissionsInDatabaseButStaleInOrMissingFromIndex", permissionsInDatabaseButStaleInOrMissingFromSolr)
.add("permissionsInIndexButNotDatabase", permissionsInSolrButNotDatabase);

logger.log(Level.INFO, "contentInDatabaseButStaleInOrMissingFromIndex: {0}", contentInDatabaseButStaleInOrMissingFromSolr);
logger.log(Level.INFO, "contentInIndexButNotDatabase: {0}", contentInSolrButNotDatabase);
logger.log(Level.INFO, "permissionsInDatabaseButStaleInOrMissingFromIndex: {0}", permissionsInDatabaseButStaleInOrMissingFromSolr);
logger.log(Level.INFO, "permissionsInIndexButNotDatabase: {0}", permissionsInSolrButNotDatabase);

return new AsyncResult<>(data);
return new AsyncResult<>(response);
}

@Asynchronous
public Future<JsonObjectBuilder> clearOrphans() {
JsonObjectBuilder response = Json.createObjectBuilder();
Expand Down
68 changes: 38 additions & 30 deletions src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.DvObject.DType;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
Expand Down Expand Up @@ -2210,62 +2211,66 @@ public List<String> findFilesInSolrOnly() throws SearchException {
* @throws SearchException
*/
public List<String> findPermissionsInSolrOnly() throws SearchException {
logger.info("Checking for solr-only permissions");
List<String> permissionInSolrOnly = new ArrayList<>();
try {
int rows = 100;
int rows = 1000;
SolrQuery q = (new SolrQuery(SearchFields.DEFINITION_POINT_DVOBJECT_ID+":*")).setRows(rows).setSort(SortClause.asc(SearchFields.ID));
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
boolean done = false;
while (!done) {
q.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse rsp = solrServer.query(q);
String nextCursorMark = rsp.getNextCursorMark();
logger.fine("Next cursor mark (1K entries): " + nextCursorMark);
SolrDocumentList list = rsp.getResults();
for (SolrDocument doc: list) {
long id = Long.parseLong((String) doc.getFieldValue(SearchFields.DEFINITION_POINT_DVOBJECT_ID));
String docId = (String)doc.getFieldValue(SearchFields.ID);
if(!dvObjectService.checkExists(id)) {
String docId = (String) doc.getFieldValue(SearchFields.ID);
String dtype = dvObjectService.getDtype(id);
if (dtype == null) {
permissionInSolrOnly.add(docId);
} else {
DvObject obj = dvObjectService.findDvObject(id);
if (obj instanceof Dataset d) {
DatasetVersion dv = d.getLatestVersion();
}
if (dtype.equals(DType.Dataset.getDType())) {
List<String> states = datasetService.getVersionStates(id);
if (states != null) {
String latestState = states.get(states.size() - 1);
if (docId.endsWith("draft_permission")) {
if (!dv.isDraft()) {
if (!latestState.equals(VersionState.DRAFT.toString())) {
permissionInSolrOnly.add(docId);
}
} else if (docId.endsWith("deaccessioned_permission")) {
if (!dv.isDeaccessioned()) {
if (!latestState.equals(VersionState.DEACCESSIONED.toString())) {
permissionInSolrOnly.add(docId);
}
} else {
if (d.getReleasedVersion() == null) {
if (!states.contains(VersionState.RELEASED.toString())) {
permissionInSolrOnly.add(docId);
}
}
} else if (obj instanceof DataFile f) {
List<VersionState> states = dataFileService.findVersionStates(f.getId());
Set<String> strings = states.stream().map(VersionState::toString).collect(Collectors.toSet());
logger.fine("States for " + docId + ": " + String.join(", ", strings));
if (docId.endsWith("draft_permission")) {
if (!states.contains(VersionState.DRAFT)) {
permissionInSolrOnly.add(docId);
}
} else if (docId.endsWith("deaccessioned_permission")) {
if (!states.contains(VersionState.DEACCESSIONED) && states.size() == 1) {
permissionInSolrOnly.add(docId);
}
}
} else if (dtype.equals(DType.DataFile.getDType())) {
List<VersionState> states = dataFileService.findVersionStates(id);
Set<String> strings = states.stream().map(VersionState::toString).collect(Collectors.toSet());
logger.fine("States for " + docId + ": " + String.join(", ", strings));
if (docId.endsWith("draft_permission")) {
if (!states.contains(VersionState.DRAFT)) {
permissionInSolrOnly.add(docId);
}
} else if (docId.endsWith("deaccessioned_permission")) {
if (!states.contains(VersionState.DEACCESSIONED) && states.size() == 1) {
permissionInSolrOnly.add(docId);
}
} else {
if (!states.contains(VersionState.RELEASED)) {
permissionInSolrOnly.add(docId);
} else {
if (!states.contains(VersionState.RELEASED)) {
if (!dataFileService.isInReleasedVersion(id)) {
logger.fine("Adding doc " + docId + " to list of permissions in Solr only");
permissionInSolrOnly.add(docId);
} else {
if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) {
logger.fine("Adding doc " + docId + " to list of permissions in Solr only");
permissionInSolrOnly.add(docId);
}
}

}

}
}
}
Expand All @@ -2277,6 +2282,9 @@ public List<String> findPermissionsInSolrOnly() throws SearchException {
} catch (SolrServerException | IOException ex) {
throw new SearchException("Error searching Solr for permissions" , ex);

} catch (Exception e) {
logger.warning(e.getLocalizedMessage());
e.printStackTrace();
}
return permissionInSolrOnly;
}
Expand Down Expand Up @@ -2307,7 +2315,7 @@ private List<String> findDvObjectInSolrOnly(String type) throws SearchException
if (idObject != null) {
try {
long id = (Long) idObject;
if (!dvObjectService.checkExists(id)) {
if (dvObjectService.getDtype(id) == null) {
dvObjectInSolrOnly.add((String)doc.getFieldValue(SearchFields.ID));
}
} catch (ClassCastException ex) {
Expand Down

0 comments on commit d4e9a4f

Please sign in to comment.