diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index bd4aa27ba68..2afb5d26082 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.search.IndexableDataset.DatasetState; import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -476,12 +477,8 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo should we use solrDocIdentifierDataset or * IndexableObject.IndexableTypes.DATASET.getName() + "_" ? */ - // String solrIdPublished = solrDocIdentifierDataset + dataset.getId(); String solrIdPublished = determinePublishedDatasetSolrDocId(dataset); String solrIdDraftDataset = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.WORKING_COPY.getSuffix(); - // String solrIdDeaccessioned = IndexableObject.IndexableTypes.DATASET.getName() - // + "_" + dataset.getId() + - // IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); @@ -494,112 +491,53 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr atLeastOnePublishedVersion = true; } List solrIdsOfDocsToDelete = null; - - try { - solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - //We keep the latest version's docs unless it is deaccessioned and there is no published/released version - //So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - //Clear any unused dataset docs - if (!latestVersion.isDraft()) { - // The latest version is released, so should delete any draft docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + draftSuffix); - } - if (!atLeastOnePublishedVersion) { - // There's no released version, so should delete any normal state docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId()); - } - if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { - // There's a released version or a draft, so should delete any deaccessioned - // state docs for the dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + deaccessionedSuffix); - } - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } - logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - int numPublishedVersions = 0; - List versions = dataset.getVersions(); - //List solrIdsOfFilesToDelete = new ArrayList<>(); - if (logger.isLoggable(Level.FINE)) { - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileInfo = new ArrayList<>(); - List fileMetadatas = datasetVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - /** - * It sounds weird but the first thing we'll do is preemptively delete the Solr - * documents of all published files. Don't worry, published files will be - * re-indexed later along with the dataset. We do this so users can delete files - * from published versions of datasets and then re-publish a new version without - * fear that their old published files (now deleted from the latest published - * version) will be searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); } -// try { - /** - * Preemptively delete *all* Solr documents for files associated with the - * dataset based on a Solr query. - * - * We must query Solr for this information because the file has been deleted - * from the database ( perhaps when Solr was down, as reported in - * https://github.com/IQSS/dataverse/issues/2086 ) so the database doesn't even - * know about the file. It's an orphan. - * - * @todo This Solr query should make the iteration above based on the database - * unnecessary because it the Solr query should find all files for the - * dataset. We can probably remove the iteration above after an "index - * all" has been performed. Without an "index all" we won't be able to - * find files based on parentId because that field wasn't searchable in - * 4.0. - * - * @todo We should also delete the corresponding Solr "permission" documents for - * the files. - */ - // List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - // solrIdsOfFilesToDelete.addAll(allFilesForDataset); -// } catch (SearchException | NullPointerException ex) { -// logger.fine("could not run search of files to delete: " + ex); -// } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); } - } - debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); - if (doNormalSolrDocCleanUp) { - + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + if(!solrIdsOfDocsToDelete.isEmpty()) { List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); for(String file: solrIdsOfDocsToDelete) { @@ -636,19 +574,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(" (and files). Result: ") - .append(deleteDeaccessionedResult).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ") - .append(deletePublishedResults).append("\n"); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -687,19 +613,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("Attempting to delete traces of drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } /** * Desired state for existence of cards: {DEACCESSIONED=true, @@ -741,20 +655,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("The latest version is published. Attempting to delete drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {RELEASED=true, @@ -801,11 +702,6 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -843,7 +739,45 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } -/* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + private void writeDebugInfo(StringBuilder debug, Dataset dataset) { + List versions = dataset.getVersions(); + int numPublishedVersions = 0; + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + } + + /* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { String deleteDraftFilesResults = ""; IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); deleteDraftFilesResults = indexResponse.toString(); @@ -925,15 +859,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - - private String removePublished(Dataset dataset) { - StringBuilder result = new StringBuilder(); - String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); - result.append(deletePublishedResult); -// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - private Dataverse findRootDataverseCached() { if (true) { /**