Skip to content

Commit

Permalink
Optimize gitignore detection and file tree walking
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent Pizzo committed Jul 18, 2024
1 parent 13a2585 commit 2e34b31
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 84 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
*.iml
.vscode/
target/
dependency-reduced-pom.xml
dependency-reduced-pom.xml
.DS_Store
50 changes: 26 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,29 +72,30 @@ Process CODEOWNER files
-V, --version Print version information and exit.
Commands:
help Display help information about the specified command.
list Lists all files with the corresponding approvers
verify Verifies the format of the CODEOWNERS file
list Lists all files with the corresponding approvers.
verify Verifies the format of the CODEOWNERS file.
```

```shell
$ codeowners-cli list help

Usage: codeowners-cli list [-fu] [-idl] [-ngi] [-cf=<codeownersFile>]
[-gi=<gitignoreFile>] [-o=<owners>]... [<files>...]
[COMMAND]
Lists all files with the corresponding approvers
[<files>...] Specifies the files to scan
-cf, --codeowners-file=<codeownersFile>
Specify the path to the CODEOWNERS file.
-f, --fail-on-output Whether to exit non-zero if there are any matches.
-gi, --gitignore-file=<gitignoreFile>
Specify the path to the .gitignore file.
-idl, --ignore-dot-files
Whether to ignore the dot files.
-ngi, --no-gitignore Whether to ignore the .gitignore file.
-o, --owners=<owners> Filters the results by owner
-u, --unowned-files Whether to only show unowned files (can be
combined with -o).
Usage: codeowners-cli list [-fgu] [-c=<codeownersFile>] [-p=<basePath>]
[-o=<owners>]... <files>... [COMMAND]
Lists all files with the corresponding approvers.
<files>... Specifies the files to scan.
Default: ./
-c, --codeowners-file=<codeownersFile>
Specify the path to the CODEOWNERS file.
-f, --fail-on-matches Whether to exit non-zero if there are any matches.
-g, --git Indicates whether git should be used to find .
gitignore files. (git must be available on command
line).
-o, --owners=<owners> Filters the results by owner.
-p, --base-path=<basePath>
The projects base path (useful for when .gitignore is
located elsewhere).
-u, --unowned-files Whether to only show unowned files (can be combined
with -o).
Commands:
help Display help information about the specified command.
```
Expand Down Expand Up @@ -127,10 +128,11 @@ $ codeowners-cli list
```shell
codeowners-cli verify help

Usage: codeowners-cli verify [-cf=<codeownersFile>] [COMMAND]
Verifies the format of the CODEOWNERS file
-cf, --codeowners-file=<codeownersFile>
Usage: codeowners-cli verify [-c=<codeownersFile>] [COMMAND]
Verifies the format of the CODEOWNERS file.
-c, --codeowners-file=<codeownersFile>
Specify the path to the CODEOWNERS file.
Default: ./CODEOWNERS
Commands:
help Display help information about the specified command.
```
Expand All @@ -149,17 +151,17 @@ To use with Pre-commit, simply add the following to your `.pre-commit-config.yam
```yaml
- repo: https://github.com/vincentjames501/codeowners-cli
rev: v0.0.3
rev: v0.0.4
hooks:
- id: codeowners-cli
args: [ "list", "--unowned-files", "--fail-on-output" ]
args: [ "list", "--unowned-files", "--fail-on-matches" ]
```
### To prevent committing just invalid CODEOWNERS
```yaml
- repo: https://github.com/vincentjames501/codeowners-cli
rev: v0.0.3
rev: v0.0.4
hooks:
- id: codeowners-cli
args: [ "verify" ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,105 +15,171 @@
*/
package org.vincentjames501.codeowners.commands;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import nl.basjes.codeowners.CodeOwners;
import nl.basjes.gitignore.GitIgnore;
import nl.basjes.gitignore.GitIgnoreFileSet;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;

import static picocli.CommandLine.Help.Visibility.ALWAYS;

/**
* @author vincentjames501
* @version 0.0.1
* @since 2024-July-15
*/
@Command(name = "list", description = "Lists all files with the corresponding approvers", subcommands = CommandLine.HelpCommand.class)
@Command(name = "list", description = "Lists all files with the corresponding approvers.", subcommands = CommandLine.HelpCommand.class)
public class ListCodeOwners implements Callable<Integer> {
@Option(names = { "-cf", "--codeowners-file" }, description = "Specify the path to the CODEOWNERS file.")
Path codeownersFile = Paths.get("./CODEOWNERS");

@Option(names = { "-gi", "--gitignore-file" }, description = "Specify the path to the .gitignore file.")
Path gitignoreFile;

@Option(names = { "-ngi", "--no-gitignore" }, description = "Whether to ignore the .gitignore file.")
boolean noGitIgnore = false;

@Option(names = { "-idl", "--ignore-dot-files" }, description = "Whether to ignore the dot files.")
boolean ignoreDotFiles = true;
@Option(names = { "-c", "--codeowners-file" }, description = "Specify the path to the CODEOWNERS file.")
Path codeownersFile;

@Option(names = { "-u", "--unowned-files" }, description = "Whether to only show unowned files (can be combined with -o).")
boolean unownedFilesOnly = false;

@Option(names = { "-f", "--fail-on-output" }, description = "Whether to exit non-zero if there are any matches.")
boolean failOnOutput = false;
@Option(names = { "-f", "--fail-on-matches" }, description = "Whether to exit non-zero if there are any matches.")
boolean failOnMatches = false;

@Option(names = { "-o", "--owners" }, description = "Filters the results by owner")
Set<String> owners;
@Option(names = { "-g",
"--git" }, description = "Indicates whether git should be used to find .gitignore files. (git must be available on command line).", defaultValue = "true")
boolean useGit;

@Parameters(description = "Specifies the files to scan")
List<Path> files = List.of(Paths.get("./"));
@Option(names = { "-o", "--owners" }, description = "Filters the results by owner.")
Set<String> owners;

private static final Path DEFAULT_GIT_IGNORE_PATH = Paths.get("./.gitignore");
@Option(names = { "-p", "--base-path" }, description = "The projects base path (useful for when .gitignore is located elsewhere).", defaultValue = "./")
Path basePath;

@Parameters(description = "Specifies the files to scan.", defaultValue = "./", arity = "1..*", showDefaultValue = ALWAYS)
List<Path> files;

private static final Set<String> IGNORED_DIR_NAMES = Set.of(".git", ".hg", ".svn");

private static final Set<String> IGNORED_FILE_NAMES = Set.of(".gitignore");

/**
* This isn't ideal but Files.walk/Files.find don't skip subtrees so if you have super large
* directories (such as node_modules/target/etc) that are ignored for example it still
* walks the file taking a lot of time. Let's replace this in the future with a java stream
* so we can parallelize all of this in the future!
*/
private List<Path> walk(final Path path, final String baseReplacementPattern, final GitIgnoreFileSet ignoredFileSet) throws IOException {
final List<Path> matches = new ArrayList<>();
Files.walkFileTree(path, new FileVisitor<>() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
if (IGNORED_DIR_NAMES.contains(dir.getFileName().toString()) ||
ignoredFileSet.ignoreFile(dir.toString().replaceFirst(baseReplacementPattern, ""))) {
return FileVisitResult.SKIP_SUBTREE;
}
else {
return FileVisitResult.CONTINUE;
}
}

private GitIgnore buildGitIgnore() throws IOException {
if (!noGitIgnore) {
if (gitignoreFile == null && Files.exists(DEFAULT_GIT_IGNORE_PATH)) {
return new GitIgnore(DEFAULT_GIT_IGNORE_PATH.toFile());
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
if (!IGNORED_FILE_NAMES.contains(file.getFileName().toString()) &&
ignoredFileSet.keepFile(file.toString().replaceFirst(baseReplacementPattern, ""))) {
matches.add(file);
}
return FileVisitResult.CONTINUE;
}
else if (gitignoreFile != null) {
return new GitIgnore(gitignoreFile.toFile());

@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) {
matches.remove(file);
return FileVisitResult.CONTINUE;
}
else {
return null;

@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) {
return FileVisitResult.CONTINUE;
}
}
else {
return null;
}
});
return matches;
}

@Override
public Integer call() {
if (!Files.exists(codeownersFile)) {
throw new IllegalArgumentException(String.format("CODEOWNERS not found: %s", codeownersFile));
if (basePath == null || !Files.exists(basePath) || !basePath.toFile().isDirectory()) {
throw new IllegalArgumentException("Base path could not found!");
}

if (gitignoreFile != null && !Files.exists(gitignoreFile)) {
throw new IllegalArgumentException(String.format(".gitignore not found: %s", gitignoreFile));
codeownersFile = codeownersFile == null
? Stream.of("CODEOWNERS",
".github/CODEOWNERS",
".gitlab/CODEOWNERS",
"docs/CODEOWNERS")
.map(p -> basePath.resolve(p))
.filter(Files::exists)
.findFirst()
.orElse(null)
: codeownersFile;

if (codeownersFile == null || !Files.exists(codeownersFile)) {
throw new IllegalArgumentException("No CODEOWNERS file found!");
}

files.forEach(path -> {
if (!Files.exists(path)) {
throw new IllegalArgumentException(String.format("Path not found: %s", path));
throw new IllegalArgumentException(String.format("File or directory not found: %s", path));
}
});

// We shell out to git to list our gitignore files as naively walking the tree scanning for all .gitignore
// files may be super costly if projects have folders like node_modules/target/.m2/etc in them.
try {
final GitIgnore gitIgnore = buildGitIgnore();
final GitIgnoreFileSet ignoredFileSet = new GitIgnoreFileSet(new File("./"), false)
.assumeQueriesAreProjectRelative();
if (useGit) {
final Process listGitIgnoreFilesProcess = new ProcessBuilder("git", "ls-files", "*.gitignore")
.directory(basePath.toFile())
.start();
listGitIgnoreFilesProcess.waitFor(1, TimeUnit.MINUTES);
final Pattern gitIgnorePattern = Pattern.compile("^(.*)\\.gitignore$");
new BufferedReader(new InputStreamReader(listGitIgnoreFilesProcess.getInputStream(), StandardCharsets.UTF_8))
.lines()
.filter(s -> !s.isBlank())
.forEach(s -> {
try {
final Matcher matcher = gitIgnorePattern.matcher(s);
matcher.matches();
ignoredFileSet.add(new GitIgnore(matcher.group(1), basePath.resolve(Path.of(s)).toFile()));
}
catch (IOException e) {
throw new RuntimeException(e);
}
});
}

final CodeOwners codeOwners = new CodeOwners(codeownersFile.toFile());
if (codeOwners.hasStructuralProblems()) {
throw new RuntimeException("CodeOwners has structural issues!");
}

final String baseReplacementPattern = "^" + Pattern.quote(basePath.toString());

final Stream<Path> allPotentialFiles = files.stream()
.flatMap(path -> {
try {
return Files.find(path,
Integer.MAX_VALUE,
(filePath, basicFileAttributes) -> basicFileAttributes.isRegularFile() &&
!filePath.toFile().isHidden() &&
(!ignoreDotFiles || !filePath.toString().contains("/.")) &&
(gitIgnore == null || !Boolean.TRUE.equals(gitIgnore.isIgnoredFile(filePath.toString()))));
return walk(path, baseReplacementPattern, ignoredFileSet).stream();
}
catch (IOException e) {
throw new RuntimeException(e);
Expand All @@ -123,8 +189,8 @@ public Integer call() {
.stream()
.sorted();

final List<Map.Entry<Path, String>> matchEntries = allPotentialFiles
.map(filePath -> Map.entry(filePath, codeOwners.getAllApprovers(filePath.toString())))
final List<Map.Entry<String, String>> matchEntries = allPotentialFiles
.map(filePath -> Map.entry(filePath, codeOwners.getAllApprovers(filePath.toString().replaceFirst(baseReplacementPattern, "."))))
.filter(entry -> {
final List<String> approvers = entry.getValue();
if (owners != null) {
Expand All @@ -138,7 +204,7 @@ else if (unownedFilesOnly) {
}
})
.map(entry -> Map.entry(
entry.getKey(),
entry.getKey().toString().replaceFirst(baseReplacementPattern, ""),
entry.getValue().isEmpty() ? "(Unowned)" : String.join(", ", entry.getValue())))
.toList();

Expand All @@ -149,7 +215,7 @@ else if (unownedFilesOnly) {
else {
final int maxFileLength = Math.max(matchEntries
.stream()
.mapToInt(entry -> entry.getKey().toString().length())
.mapToInt(entry -> entry.getKey().length())
.max()
.getAsInt(), 12);
final int maxCodeOwnersLength = Math.max(matchEntries
Expand All @@ -160,12 +226,12 @@ else if (unownedFilesOnly) {

final String format = "%" + maxFileLength + "s | %" + maxCodeOwnersLength + "s\n";
System.out.printf(format, "File", "Approvers");
matchEntries.forEach(entry -> System.out.printf(format, entry.getKey().toString(), entry.getValue()));
matchEntries.forEach(entry -> System.out.printf(format, entry.getKey(), entry.getValue()));

return failOnOutput ? 1 : 0;
return failOnMatches ? 1 : 0;
}
}
catch (IOException e) {
catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,25 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.Callable;

import nl.basjes.codeowners.CodeOwners;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;

import static picocli.CommandLine.Help.Visibility.ALWAYS;

/**
* @author vincentjames501
* @version 0.0.1
* @since 2024-July-15
*/
@Command(name = "verify", description = "Verifies the format of the CODEOWNERS file", subcommands = CommandLine.HelpCommand.class)
@Command(name = "verify", description = "Verifies the format of the CODEOWNERS file.", subcommands = CommandLine.HelpCommand.class)
public class Verify implements Callable<Integer> {
@Option(names = { "-cf", "--codeowners-file" }, description = "Specify the path to the CODEOWNERS file.")
Path codeownersFile = Paths.get("./CODEOWNERS");
@Option(names = { "-c",
"--codeowners-file" }, description = "Specify the path to the CODEOWNERS file.", defaultValue = "./CODEOWNERS", required = true, showDefaultValue = ALWAYS)
Path codeownersFile;

@Override
public Integer call() {
Expand Down
Loading

0 comments on commit 2e34b31

Please sign in to comment.