fpgmaas · mkniewallner · Mar 18, 2024 · Mar 18, 2024
diff --git a/src/imports.rs b/src/imports.rs
@@ -26,24 +26,40 @@ pub fn get_imports_from_py_files(py: Python, file_paths: Vec<&PyString>) -> PyRe
         .collect();
 
     // Process each file in parallel and collect results
-    let results: PyResult<Vec<_>> = rust_file_paths
+    let results: Vec<_> = rust_file_paths
         .par_iter()
-        .map(|path_str| _get_imports_from_py_file(path_str))
+        .map(|path_str| match _get_imports_from_py_file(path_str) {
+            Ok(result) => (path_str, Ok(result)),
+            Err(e) => (path_str, Err(e)),
+        })
         .collect();
 
-    let results = results?;
-
     // Merge results from each thread
     let mut all_imports = HashMap::new();
-    for file_result in results {
-        for (module, locations) in file_result {
-            all_imports
-                .entry(module)
-                .or_insert_with(Vec::new)
-                .extend(locations);
+    let mut errors = Vec::new();
+
+    for (path, file_result) in results {
+        match file_result {
+            Ok(file_result) => {
+                for (module, locations) in file_result {
+                    all_imports
+                        .entry(module)
+                        .or_insert_with(Vec::new)
+                        .extend(locations);
+                }
+            }
+            Err(e) => errors.push((path.to_string(), e)),
         }
     }
 
+    for (path, error) in errors {
+        log::warn!(
+            "Warning: Skipping processing of {} because of the following error: \"{}\".",
+            path,
+            error
+        );
+    }
+
     convert_to_python_dict(py, all_imports)
 }
 
@@ -60,32 +76,19 @@ pub fn get_imports_from_py_file(py: Python, file_path: &PyString) -> PyResult<Py
 /// Core helper function that extracts import statements and their locations from the content of a single Python file.
 /// Used internally by both parallel and single file processing functions.
 fn _get_imports_from_py_file(path_str: &str) -> PyResult<HashMap<String, Vec<Location>>> {
-    let file_content = match read_file(path_str) {
-        Ok(content) => content,
-        Err(_) => {
-            log::warn!("Warning: File {} could not be read. Skipping...", path_str);
-            return Ok(HashMap::new());
-        }
-    };
+    let file_content = read_file(path_str)?;
 
-    let ast = get_ast_from_file_content(&file_content, path_str)
-        .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", path_str, e)))?;
+    let ast = parse(&file_content, Mode::Module, path_str)
+        .map_err(|e| PySyntaxError::new_err(e.to_string()))?;
 
     let imported_modules = extract_imports_from_ast(ast);
-
     Ok(convert_imports_with_textranges_to_location_objects(
         imported_modules,
         path_str,
         &file_content,
     ))
 }
 
-/// Parses the content of a Python file into an abstract syntax tree (AST).
-pub fn get_ast_from_file_content(file_content: &str, file_path: &str) -> PyResult<Mod> {
-    parse(file_content, Mode::Module, file_path)
-        .map_err(|e| PySyntaxError::new_err(format!("Error parsing file {}: {}", file_path, e)))
-}
-
 /// Iterates through an AST to identify and collect import statements, and returns them together with their
 /// respective TextRange for each occurrence.
 fn extract_imports_from_ast(ast: Mod) -> HashMap<String, Vec<TextRange>> {

diff --git a/tests/unit/imports/test_extract.py b/tests/unit/imports/test_extract.py
@@ -122,19 +122,33 @@ def test_import_parser_file_encodings_ipynb(code_cell_content: list[str], encodi
         assert get_imported_modules_from_list_of_files([random_file]) == {"foo": [Location(random_file, 1, 0)]}
 
 
-def test_import_parser_file_encodings_warning(tmp_path: Path, caplog: LogCaptureFixture) -> None:
-    file_path = Path("file1.py")
+def test_import_parser_errors(tmp_path: Path, caplog: LogCaptureFixture) -> None:
+    file_ok = Path("file_ok.py")
+    file_with_bad_encoding = Path("file_with_bad_encoding.py")
+    file_with_syntax_error = Path("file_with_syntax_error.py")
 
     with run_within_dir(tmp_path):
-        # The characters below are represented differently in ISO-8859-1 and UTF-8, so this should raise an error.
-        with file_path.open("w", encoding="ISO-8859-1") as f:
+        with file_ok.open("w") as f:
+            f.write("import black")
+
+        with file_with_bad_encoding.open("w", encoding="ISO-8859-1") as f:
             f.write("# -*- coding: utf-8 -*-\nprint('ÆØÅ')")
 
-        with caplog.at_level(logging.WARNING):
-            assert get_imported_modules_from_list_of_files([file_path]) == {}
+        with file_with_syntax_error.open("w") as f:
+            f.write("invalid_syntax:::")
 
-        # //TODO logging from Rust still includes it's own warning and file + line number. Can we get rid of that?
-        pattern = re.compile(
-            r"WARNING  deptry.imports:imports.rs:\d+ Warning: File file1.py could not be read. Skipping...\n"
+        with caplog.at_level(logging.WARNING):
+            assert get_imported_modules_from_list_of_files([
+                file_ok,
+                file_with_bad_encoding,
+                file_with_syntax_error,
+            ]) == {"black": [Location(file=Path("file_ok.py"), line=1, column=8)]}
+
+        assert re.search(
+            r"WARNING  deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_bad_encoding.py because of the following error: \"OSError: Failed to decode file content with the detected encoding.\".",
+            caplog.text,
+        )
+        assert re.search(
+            r"WARNING  deptry.imports:imports.rs:\d+ Warning: Skipping processing of file_with_syntax_error.py because of the following error: \"SyntaxError: invalid syntax. Got unexpected token ':' at byte offset 15\".",
+            caplog.text,
         )
-        assert pattern.search(caplog.text) is not None