Skip to content

Commit

Permalink
treat paths in history based reindex correctly
Browse files Browse the repository at this point in the history
fixes #4317
  • Loading branch information
vladak committed Feb 19, 2024
1 parent b753f02 commit 520ce3b
Show file tree
Hide file tree
Showing 8 changed files with 507 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
*/

/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
*/
package org.opengrok.indexer.history;

import org.jetbrains.annotations.VisibleForTesting;

import java.util.Collection;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.HashSet;
import java.util.Set;

/**
* This class is meant to collect files that were touched in some way by SCM update.
Expand All @@ -36,14 +36,11 @@
* in one changeset a file may be deleted, only to be re-added in the next changeset etc.
*/
public class FileCollector extends ChangesetVisitor {
private final SortedSet<String> files;
private final Set<String> files;

/**
* Assumes comparing in the same way as {@code org.opengrok.indexer.index.IndexDatabase#FILENAME_COMPARATOR}.
*/
public FileCollector(boolean consumeMergeChangesets) {
super(consumeMergeChangesets);
files = new TreeSet<>();
files = new HashSet<>();
}

public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) {
Expand All @@ -58,7 +55,10 @@ public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) {
}
}

public SortedSet<String> getFiles() {
/**
* @return set of file paths relative to source root. There are no guarantees w.r.t. ordering.
*/
public Set<String> getFiles() {
return files;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ private void checkProjectsParallel(IndexCheckMode mode, Set<String> projectNames
// It will be thrown once all the checks complete.
LOGGER.log(Level.WARNING, "could not perform index check", exception);
ioException = (IOException) exception;
} else {
throw new IndexCheckException(exception);
}
}
}
Expand Down Expand Up @@ -515,19 +517,30 @@ static List<Path> getLiveDocumentPaths(Path indexPath) throws IOException {

Bits liveDocs = MultiBits.getLiveDocs(indexReader);

LOGGER.log(Level.FINEST, "maxDoc = {0}", indexReader.maxDoc());
for (int i = 0; i < indexReader.maxDoc(); i++) {
Document doc = indexReader.storedFields().document(i);

// liveDocs is null if the index has no deletions.
if (liveDocs != null && !liveDocs.get(i)) {
IndexableField field = doc.getField(QueryBuilder.U);
if (field != null) {
String uidString = field.stringValue();
LOGGER.log(Level.FINEST, "ignoring ''{0}'' at {1}",
new Object[]{Util.uid2url(uidString), Util.uid2date(uidString)});
} else {
LOGGER.log(Level.FINEST, "ignoring {0}", doc);
}
continue;
}

// This should avoid the special LOC documents.
IndexableField field = doc.getField(QueryBuilder.U);
if (field != null) {
String uid = field.stringValue();
livePaths.add(Path.of(Util.uid2url(uid)));
String uidString = field.stringValue();
LOGGER.log(Level.FINEST, "live doc: ''{0}'' at {1}",
new Object[]{Util.uid2url(uidString), Util.uid2date(uidString)});
livePaths.add(Path.of(Util.uid2url(uidString)));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ public IndexCheckException(String message, Set<Path> paths) {
failedPaths.addAll(paths);
}

public IndexCheckException(Exception e) {
super(e);
}

/**
* @return set of source paths which failed the check
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,24 @@ public class IndexDatabase {

private static final Logger LOGGER = LoggerFactory.getLogger(IndexDatabase.class);

private static final Comparator<File> FILENAME_COMPARATOR = Comparator.comparing(File::getName);
@VisibleForTesting
static final Comparator<File> FILENAME_COMPARATOR = Comparator.comparing(File::getName);

@VisibleForTesting
static final Comparator<Path> FILEPATH_COMPARATOR = (p1, p2) -> {
int nameCount = Math.min(p1.getNameCount(), p2.getNameCount());
int i;
for (i = 0; i < nameCount; i++) {
var c1 = p1.getName(i).toString();
var c2 = p2.getName(i).toString();
if (c1.equals(c2)) {
continue;
}
return c1.compareTo(c2);
}

return Integer.compare(p1.getNameCount(), p2.getNameCount());
};

private static final Set<String> CHECK_FIELDS;

Expand Down Expand Up @@ -197,6 +214,22 @@ public IndexDatabase() throws IOException {
this(null);
}

/**
* Anyone using this constructor is supposed to never call {@link #update()}.
* Do not use for anything besides testing.
* @param uidIter uid iterator
* @param writer index writer
* @throws IOException on error
*/
@VisibleForTesting
IndexDatabase(Project project, TermsEnum uidIter, IndexWriter writer) throws IOException {
this(project, new IndexDownArgsFactory());
this.uidIter = uidIter;
this.writer = writer;
this.completer = new PendingFileCompleter();
initialize();
}

/**
* Create a new instance of an Index Database for a given project.
*
Expand Down Expand Up @@ -709,8 +742,7 @@ public void update() throws IOException {
if (stat == TermsEnum.SeekStatus.END) {
uidIter = null;
LOGGER.log(Level.WARNING,
"Couldn''t find a start term for {0}, empty u field?",
startUid);
"Couldn''t find a start term for {0}, empty u field?", startUid);
}
}

Expand Down Expand Up @@ -819,19 +851,25 @@ private void setupDeletedUids() throws IOException {
Statistics stat = new Statistics();
LOGGER.log(Level.FINEST, "traversing the documents in {0} to collect uids of deleted documents",
indexDirectory);
StoredFields storedFields = reader.storedFields();
for (int i = 0; i < reader.maxDoc(); i++) {
Document doc = storedFields.document(i, LIVE_CHECK_FIELDS); // use limited-field version
IndexableField field = doc.getField(QueryBuilder.U);
if (!liveDocs.get(i)) {
StoredFields storedFields = reader.storedFields();
Document doc = storedFields.document(i, LIVE_CHECK_FIELDS); // use limited-field version
IndexableField field = doc.getField(QueryBuilder.U);
if (field != null) {
if (LOGGER.isLoggable(Level.FINEST)) {
String uidString = field.stringValue();
LOGGER.log(Level.FINEST, "adding ''{0}'' at {1} to deleted uid set",
new Object[]{Util.uid2url(uidString), Util.uid2date(uidString)});
LOGGER.log(Level.FINEST, "adding ''{0}'' ({2}) at {1} to deleted uid set",
new Object[]{Util.uid2url(uidString), Util.uid2date(uidString), i});
}
deletedUids.add(field.stringValue());
}
} else {
if (field != null) {
String uidString = field.stringValue();
LOGGER.log(Level.FINEST, "live doc: ''{0}'' ({2}) at {1}",
new Object[]{Util.uid2url(uidString), Util.uid2date(uidString), i});
}
}
}
stat.report(LOGGER, Level.FINEST, String.format("found %s deleted documents in %s",
Expand Down Expand Up @@ -931,12 +969,17 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOExcepti

try (Progress progress = new Progress(LOGGER, String.format("collecting files for %s", project),
fileCollector.getFiles().size())) {
for (String path : fileCollector.getFiles()) {
List<Path> paths = fileCollector.getFiles().stream().
map(Path::of).
sorted(FILEPATH_COMPARATOR).
collect(Collectors.toList());
LOGGER.log(Level.FINEST, "collected sorted files: {0}", paths);
for (Path path : paths) {
if (isInterrupted()) {
return;
}
File file = new File(sourceRoot, path);
processFileHistoryBased(args, file, path);
File file = new File(sourceRoot, path.toString());
processFileHistoryBased(args, file, path.toString());
progress.increment();
}
}
Expand Down Expand Up @@ -1096,16 +1139,17 @@ private void removeAnnotationFile(String path) {
* and queue the removal of the associated xref file.
*
* @param removeHistory if false, do not remove history cache for this file
* @return deleted uid (as string)
* @throws java.io.IOException if an error occurs
*/
private void removeFile(boolean removeHistory) throws IOException {
private String removeFile(boolean removeHistory) throws IOException {
String path = Util.uid2url(uidIter.term().utf8ToString());

for (IndexChangedListener listener : listeners) {
listener.fileRemove(path);
}

removeFileDocUid(path);
String deletedUid = removeFileDocUid(path);

removeXrefFile(path);

Expand All @@ -1122,9 +1166,11 @@ private void removeFile(boolean removeHistory) throws IOException {
for (IndexChangedListener listener : listeners) {
listener.fileRemoved(path);
}

return deletedUid;
}

private void removeFileDocUid(String path) throws IOException {
private String removeFileDocUid(String path) throws IOException {

// Determine if a reversal of counts is necessary, and execute if so.
if (isCountingDeltas) {
Expand All @@ -1141,6 +1187,8 @@ private void removeFileDocUid(String path) throws IOException {
}

writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));

return uidIter.term().utf8ToString();
}

private void decrementLOCforDoc(String path, Document doc) {
Expand Down Expand Up @@ -1648,6 +1696,17 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
}
}

/**
* wrapper for fatal errors during indexing.
*/
public static class IndexerFault extends RuntimeException {
private static final long serialVersionUID = -1;

public IndexerFault(String message) {
super(message);
}
}

/**
* Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments
* represent files that have actually changed in some way, while the other method's argument represent
Expand All @@ -1660,12 +1719,14 @@ void indexDown(File dir, String parent, IndexDownArgs args, Progress progress) t
@VisibleForTesting
void processFileHistoryBased(IndexDownArgs args, File file, String path) throws IOException {
final boolean fileExists = file.exists();

final Set<String> deletedUidsHere = new HashSet<>();
path = Util.fixPathIfWindows(path);

// Traverse terms until reaching document beyond path of given file.
while (uidIter != null && uidIter.term() != null
&& uidIter.term().compareTo(emptyBR) != 0
&& Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) <= 0) {
while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0
&& FILEPATH_COMPARATOR.compare(
Path.of(Util.uid2url(uidIter.term().utf8ToString())),
Path.of(path)) <= 0) {

if (deletedUids.contains(uidIter.term().utf8ToString())) {
logIgnoredUid(uidIter.term().utf8ToString());
Expand All @@ -1688,9 +1749,10 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
if (!matchOK) {
removeFile(false);
addWorkHistoryBased(args, termFile, termPath);
deletedUidsHere.add(removeFile(false));
}
} else {
removeFile(!fileExists);
deletedUidsHere.add(removeFile(!fileExists));
}

BytesRef next = uidIter.next();
Expand All @@ -1703,6 +1765,18 @@ void processFileHistoryBased(IndexDownArgs args, File file, String path) throws
// That said, it is necessary to check whether the file can be accepted. This is done in the function below.
// Also, allow for broken symbolic links (File.exists() returns false for these).
if (fileExists || Files.isSymbolicLink(file.toPath())) {
// This assumes that the last modified time is indeed what the indexer uses when adding the document.
String time = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND);
if (deletedUidsHere.contains(Util.path2uid(path, time))) {
//
// Adding document with the same date of a pre-existing document which is being removed
// will lead to index corruption (duplicate documents). Hence, make the indexer to fail hard.
//
throw new IndexerFault(
String.format("attempting to add file '%s' with date matching deleted document: %s",
path, time));
}

addWorkHistoryBased(args, file, path);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,12 @@ public static void main(String[] argv) {
System.exit(2);
} catch (IndexCheckException e) {
System.err.printf("Index check failed%n");
System.err.print("You might want to remove " + e.getFailedPaths());
if (!e.getFailedPaths().isEmpty()) {
System.err.print("You might want to remove " + e.getFailedPaths());
} else {
System.err.println("with exception: " + e);
e.printStackTrace(System.err);
}
System.exit(1);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ private static void writeAnnotation(int num, Writer out, Annotation annotation,
* walk of the file hierarchy. Thus, null character (\u0000) is used both to
* separate directory components and to separate the path from the date.
*
* @param path path to mangle.
* @param path path to mangle. This is assumed to be relative to source root.
* @param date date string to use.
* @return the mangled path.
*/
Expand Down
Loading

0 comments on commit 520ce3b

Please sign in to comment.