Skip to content

Commit

Permalink
Try to parallelize
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Dec 14, 2022
1 parent 1cbd46f commit 7ecf000
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 88 deletions.
1 change: 1 addition & 0 deletions resources/test/project/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
examples/generated
29 changes: 18 additions & 11 deletions resources/test/project/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,37 @@ Running from the repo root should pick up and enforce the appropriate settings f

```
∴ cargo run resources/test/project/
Found 5 error(s).
Found 7 error(s).
resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/file.py:1:8: F401 `os` imported but unused
resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
3 potentially fixable with the --fix option.
4 potentially fixable with the --fix option.
```

Running from the project directory itself should exhibit the same behavior:

```
∴ cd resources/test/project/ && cargo run .
Found 5 error(s).
∴ (cd resources/test/project/ && cargo run .)
Found 7 error(s).
examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
examples/docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
examples/docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
src/file.py:1:8: F401 `os` imported but unused
src/file.py:5:5: F841 Local variable `x` is assigned to but never used
src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
3 potentially fixable with the --fix option.
4 potentially fixable with the --fix option.
```

Running from the sub-package directory should exhibit the same behavior, but omit the top-level
files:

```
∴ cd resources/test/project/examples/docs && cargo run .
(cd resources/test/project/examples/docs && cargo run .)
Found 2 error(s).
docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
Expand All @@ -46,8 +50,10 @@ docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
file paths from the current working directory:

```
∴ cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/
Found 9 error(s).
∴ (cargo run -- --config=resources/test/project/pyproject.toml resources/test/project/)
Found 11 error(s).
resources/test/project/examples/.dotfiles/script.py:1:8: F401 `os` imported but unused
resources/test/project/examples/.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/concepts/file.py:1:8: F401 `os` imported but unused
resources/test/project/examples/docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/examples/docs/docs/file.py:1:8: F401 `os` imported but unused
Expand All @@ -57,15 +63,16 @@ resources/test/project/examples/docs/docs/file.py:8:5: F841 Local variable `x` i
resources/test/project/src/file.py:1:8: F401 `os` imported but unused
resources/test/project/src/file.py:5:5: F841 Local variable `x` is assigned to but never used
resources/test/project/src/import_file.py:1:1: I001 Import block is un-sorted or un-formatted
6 potentially fixable with the --fix option.
7 potentially fixable with the --fix option.
```

Running from a parent directory should this "ignore" the `exclude` (hence, `concepts/file.py` gets
included in the output):

```
∴ cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml .
Found 3 error(s).
∴ (cd resources/test/project/examples && cargo run -- --config=docs/pyproject.toml .)
Found 4 error(s).
.dotfiles/script.py:5:5: F841 Local variable `x` is assigned to but never used
docs/docs/concepts/file.py:5:5: F841 Local variable `x` is assigned to but never used
docs/docs/file.py:1:1: I001 Import block is un-sorted or un-formatted
docs/docs/file.py:8:5: F841 Local variable `x` is assigned to but never used
Expand Down
5 changes: 5 additions & 0 deletions resources/test/project/examples/.dotfiles/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os


def f():
x = 1
10 changes: 5 additions & 5 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub fn run(
) -> Result<Diagnostics> {
// Collect all the files to check.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand Down Expand Up @@ -116,7 +116,7 @@ pub fn run_stdin(
pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<usize> {
// Collect all the files to check.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand Down Expand Up @@ -146,7 +146,7 @@ pub fn add_noqa(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -
pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<usize> {
// Collect all the files to format.
let start = Instant::now();
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;
let duration = start.elapsed();
debug!("Identified files to lint in: {:?}", duration);

Expand Down Expand Up @@ -175,7 +175,7 @@ pub fn autoformat(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides)
/// Print the user-facing configuration settings.
pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> {
// Collect all files in the hierarchy.
let (paths, resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, resolver) = resolver::python_files_in_path(files, strategy, overrides)?;

// Print the list of files.
let Some(entry) = paths
Expand All @@ -195,7 +195,7 @@ pub fn show_settings(files: &[PathBuf], strategy: &Strategy, overrides: &Overrid
/// Show the list of files to be checked based on current settings.
pub fn show_files(files: &[PathBuf], strategy: &Strategy, overrides: &Overrides) -> Result<()> {
// Collect all files in the hierarchy.
let (paths, _resolver) = resolver::resolve_python_files(files, strategy, overrides)?;
let (paths, _resolver) = resolver::python_files_in_path(files, strategy, overrides)?;

// Print the list of files.
for entry in paths
Expand Down
156 changes: 84 additions & 72 deletions src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::sync::RwLock;

use anyhow::{bail, Result};
use ignore::{DirEntry, Walk, WalkBuilder, WalkParallel};
use itertools::Itertools;
use anyhow::{anyhow, bail, Result};
use ignore::{DirEntry, WalkBuilder, WalkState};
use log::debug;
use path_absolutize::path_dedot;
use rustc_hash::FxHashSet;
Expand Down Expand Up @@ -170,35 +170,20 @@ fn is_python_file(path: &Path) -> bool {
.map_or(false, |ext| ext == "py" || ext == "pyi")
}

/// Find all Python (`.py` and `.pyi` files) in a set of `PathBuf`s.
pub fn resolve_python_files(
/// Find all Python (`.py` and `.pyi` files) in a set of paths.
pub fn python_files_in_path(
paths: &[PathBuf],
strategy: &Strategy,
overrides: &Overrides,
) -> Result<(Vec<Result<DirEntry, ignore::Error>>, Resolver)> {
let mut files = Vec::new();
let mut resolver = Resolver::default();
for path in paths {
let (files_in_path, file_resolver) = python_files_in_path(path, strategy, overrides)?;
files.extend(files_in_path);
resolver.merge(file_resolver);
}
Ok((files, resolver))
}

/// Find all Python (`.py` and `.pyi` files) in a given `Path`.
fn python_files_in_path(
path: &Path,
strategy: &Strategy,
overrides: &Overrides,
) -> Result<(Vec<Result<DirEntry, ignore::Error>>, Resolver)> {
let path = fs::normalize_path(path);
// Normalize every path (e.g., convert from relative to absolute).
let paths: Vec<PathBuf> = paths.iter().map(|path| fs::normalize_path(path)).collect();

// Search for `pyproject.toml` files in all parent directories.
let mut resolver = Resolver::default();
for path in path.ancestors() {
if path.is_dir() {
let pyproject = path.join("pyproject.toml");
for path in &paths {
for ancestor in path.ancestors() {
let pyproject = ancestor.join("pyproject.toml");
if pyproject.is_file() {
let (root, settings) =
resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides))?;
Expand All @@ -207,60 +192,87 @@ fn python_files_in_path(
}
}

// Collect all Python files.
// Maybe every time we hit a `pyproject.toml` directory, we stop?
let files: Vec<Result<DirEntry, ignore::Error>> = WalkParallel::new(path)
.filter_entry(|entry| {
// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
{
let pyproject = entry.path().join("pyproject.toml");
if pyproject.is_file() {
// TODO(charlie): Return a `Result` here.
let (root, settings) =
resolve_scoped_settings(&pyproject, &Relativity::Parent, Some(overrides))
.unwrap();
resolver.add(root, settings);
// Create the `WalkBuilder`.
let mut builder = WalkBuilder::new(
paths
.get(0)
.ok_or_else(|| anyhow!("Expected at least one path to search for Python files"))?,
);
for path in &paths[1..] {
builder.add(path);
}
let walker = builder.hidden(false).build_parallel();

// Run the `WalkParallel` to collect all Python files.
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(()));
let resolver: RwLock<Resolver> = RwLock::new(resolver);
let files: std::sync::Mutex<Vec<Result<DirEntry, ignore::Error>>> =
std::sync::Mutex::new(vec![]);
walker.run(|| {
Box::new(|result| {
if let Ok(entry) = &result {
// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
{
let pyproject = entry.path().join("pyproject.toml");
if pyproject.is_file() {
match resolve_scoped_settings(
&pyproject,
&Relativity::Parent,
Some(overrides),
) {
Ok((root, settings)) => resolver.write().unwrap().add(root, settings),
Err(err) => {
*error.lock().unwrap() = Err(err);
return WalkState::Quit;
}
}
}
}
}

let path = entry.path();
let settings = resolver.resolve(path, strategy);
match fs::extract_path_names(path) {
Ok((file_path, file_basename)) => {
if !settings.exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.exclude)
{
debug!("Ignored path via `exclude`: {:?}", path);
false
} else if !settings.extend_exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.extend_exclude)
{
debug!("Ignored path via `extend-exclude`: {:?}", path);
false
} else {
true
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path, strategy);
match fs::extract_path_names(path) {
Ok((file_path, file_basename)) => {
if !settings.exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.exclude)
{
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if !settings.extend_exclude.is_empty()
&& is_excluded(file_path, file_basename, &settings.extend_exclude)
{
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip;
}
}
Err(e) => {
debug!("Ignored path due to error in parsing: {:?}: {}", path, e);
return WalkState::Skip;
}
}
Err(e) => {
debug!("Ignored path due to error in parsing: {:?}: {}", path, e);
true
}
}

if result.as_ref().map_or(true, |entry| {
(entry.depth() == 0 || is_python_file(entry.path()))
&& !entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
}) {
files.lock().unwrap().push(result);
}

WalkState::Continue
})
.filter_entry(|entry| {
(entry.depth() == 0 || is_python_file(entry.path()))
&& !entry
.file_type()
.map_or(false, |file_type| file_type.is_dir())
})
.build()
.collect::<Vec<_>>();
});

error.into_inner().unwrap()?;

Ok((files, resolver))
Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap()))
}

#[cfg(test)]
Expand Down

0 comments on commit 7ecf000

Please sign in to comment.