diff --git a/CHANGES.md b/CHANGES.md index 456f42d6..173f7a9f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,6 +22,10 @@ Unreleased `` instead of `` should be used to disable checking for links in file, so it's clearer that file itself is not ignored (and links can target it). +* [#215](https://github.com/serokell/xrefcheck/pull/215) + + Now we notify user when there are scannable files that were not added to Git + yet. Also added CLI option `--include-untracked` to scan such files and treat + as existing. 0.2.2 ========== diff --git a/src/Xrefcheck/CLI.hs b/src/Xrefcheck/CLI.hs index 48cef339..1aa809d1 100644 --- a/src/Xrefcheck/CLI.hs +++ b/src/Xrefcheck/CLI.hs @@ -81,6 +81,7 @@ data Options = Options , oColorMode :: ColorMode , oExclusionOptions :: ExclusionOptions , oNetworkingOptions :: NetworkingOptions + , oScanPolicy :: ScanPolicy } data ExclusionOptions = ExclusionOptions @@ -181,9 +182,13 @@ optionsParser = do ] oColorMode <- flag WithColors WithoutColors $ long "no-color" <> - help "Disable ANSI coloring of output" + help "Disable ANSI coloring of output." oExclusionOptions <- exclusionOptionsParser oNetworkingOptions <- networkingOptionsParser + oScanPolicy <- flag OnlyTracked IncludeUntracked $ + long "include-untracked" <> + help "Scan and treat as existing files that were not added to Git.\ + \ Files explicitly ignored by Git are always ignored by xrefcheck." return Options{..} exclusionOptionsParser :: Parser ExclusionOptions diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index adaf741b..16620a86 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -70,7 +70,7 @@ defaultAction Options{..} = do (ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions - scanRepo rw (formats $ cScanners config) fullConfig oRoot + scanRepo oScanPolicy rw (formats $ cScanners config) fullConfig oRoot when oVerbose $ fmt [int|| diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index ffcc268c..454606db 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -122,12 +122,36 @@ makeLenses ''FileInfo instance Default FileInfo where def = diffToFileInfo mempty +data ScanPolicy + = OnlyTracked + -- ^ Scan and treat as existing only files tracked by Git. + -- Warn when there are scannable files not added to Git yet. + | IncludeUntracked + -- ^ Also scan and treat as existing + -- files that were neither tracked nor ignored by Git. + deriving stock (Show, Eq) + +data FileStatus + = Scanned FileInfo + | NotScannable + -- ^ Files that are not supported by our scanners + | NotAddedToGit + -- ^ We are not scanning files that are not added to git + -- unless --include-untracked CLI option was enabled, but we're + -- gathering information about them to improve reports. + deriving stock (Show) + +data DirectoryStatus + = TrackedDirectory + | UntrackedDirectory + deriving stock (Show) + -- | All tracked files and directories. data RepoInfo = RepoInfo - { riFiles :: Map FilePath (Maybe FileInfo) - -- ^ Files from the repo with `FileInfo` attached to files that we can scan. - , riDirectories :: Set FilePath - -- ^ Tracked directories. + { riFiles :: Map FilePath FileStatus + -- ^ Files from the repo with `FileInfo` attached to files that we've scanned. + , riDirectories :: Map FilePath DirectoryStatus + -- ^ Directories containing those files. } deriving stock (Show) ----------------------------------------------------------- @@ -180,8 +204,9 @@ instance Given ColorMode => Buildable FileInfo where |] instance Given ColorMode => Buildable RepoInfo where - build (RepoInfo (nonEmpty . mapMaybe sequence . toPairs -> Just m) _) = - interpolateBlockListF' "⮚" buildFileReport m + build (RepoInfo m _) + | Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs m] + = interpolateBlockListF' "⮚" buildFileReport scanned where buildFileReport (name, info) = [int|| diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index 68163c5f..175d6f8a 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -14,6 +14,7 @@ module Xrefcheck.Scan , ScanAction , FormatsSupport , RepoInfo (..) + , ReadDirectoryMode(..) , ScanError (..) , ScanErrorDescription (..) , ScanResult (..) @@ -138,18 +139,41 @@ specificFormatsSupport formats = \ext -> M.lookup ext formatsMap , extension <- extensions ] --- | Process files that are tracked by git and not ignored by the config. +data ReadDirectoryMode + = RdmTracked + -- ^ Consider files tracked by Git, obtained from "git ls-files" + | RdmUntracked + -- ^ Consider files that are not tracked nor ignored by Git, obtained from + -- "git ls-files --others --exclude-standard" + | RdmBothTrackedAndUtracked + -- ^ Combine output from commands listed above, so we consider all files + -- except ones that are explicitly ignored by Git + +-- | Process files that match given @ReadDirectoryMode@ and aren't ignored by the config. readDirectoryWith - :: forall a. ExclusionConfig + :: forall a. ReadDirectoryMode + -> ExclusionConfig -> (FilePath -> IO a) -> FilePath -> IO [(FilePath, a)] -readDirectoryWith config scanner root = +readDirectoryWith mode config scanner root = traverse scanFile . filter (not . isIgnored) . fmap (location ) - . L.lines =<< readCreateProcess (shell "git ls-files"){cwd = Just root} "" + . L.lines =<< getFiles + where + + getFiles = case mode of + RdmTracked -> getTrackedFiles + RdmUntracked -> getUntrackedFiles + RdmBothTrackedAndUtracked -> liftA2 (<>) getTrackedFiles getUntrackedFiles + + getTrackedFiles = readCreateProcess + (shell "git ls-files"){cwd = Just root} "" + getUntrackedFiles = readCreateProcess + (shell "git ls-files --others --exclude-standard"){cwd = Just root} "" + scanFile :: FilePath -> IO (FilePath, a) scanFile = sequence . (normaliseWithNoTrailing &&& scanner) @@ -165,21 +189,48 @@ readDirectoryWith config scanner root = scanRepo :: MonadIO m - => Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult -scanRepo rw formatsSupport config root = do + => ScanPolicy -> Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult +scanRepo scanMode rw formatsSupport config root = do putTextRewrite rw "Scanning repository..." when (not $ isDirectory root) $ die $ "Repository's root does not seem to be a directory: " <> root - (errs, fileInfos) <- liftIO - $ (gatherScanErrs &&& gatherFileInfos) - <$> readDirectoryWith config processFile root - - let dirs = fromList $ foldMap (getDirs . fst) fileInfos + (errs, processedFiles) <- + let mode = case scanMode of + OnlyTracked -> RdmTracked + IncludeUntracked -> RdmBothTrackedAndUtracked + in liftIO + $ (gatherScanErrs &&& gatherFileStatuses) + <$> readDirectoryWith mode config processFile root + + notProcessedFiles <- case scanMode of + OnlyTracked -> liftIO $ + readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root + IncludeUntracked -> pure [] + + let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles + + whenJust (nonEmpty $ map fst scannableNotProcessedFiles) $ \files -> hPutStrLn @Text stderr + [int|A| + Those files are not added by Git, so we're not scanning them: + #{interpolateBlockListF files} + Please run "git add" before running xrefcheck or enable \ + --include-untracked CLI option to check these files. + |] - return . ScanResult errs $ RepoInfo (M.fromList fileInfos) dirs + let trackedDirs = foldMap (getDirs . fst) processedFiles + untrackedDirs = foldMap (getDirs . fst) notProcessedFiles + return . ScanResult errs $ RepoInfo + { riFiles = M.fromList $ processedFiles <> notProcessedFiles + , riDirectories = M.fromList + $ map (, TrackedDirectory) trackedDirs + <> map (, UntrackedDirectory) untrackedDirs + } where + mscanner :: FilePath -> Maybe ScanAction + mscanner = formatsSupport . takeExtension + isDirectory :: FilePath -> Bool isDirectory = readingSystem . doesDirectoryExist @@ -188,20 +239,19 @@ scanRepo rw formatsSupport config root = do getDirs = scanl () "" . splitDirectories . takeDirectory gatherScanErrs - :: [(FilePath, Maybe (FileInfo, [ScanError]))] + :: [(FilePath, (FileStatus, [ScanError]))] -> [ScanError] - gatherScanErrs = fold . mapMaybe (fmap snd . snd) - - gatherFileInfos - :: [(FilePath, Maybe (FileInfo, [ScanError]))] - -> [(FilePath, Maybe FileInfo)] - gatherFileInfos = map (second (fmap fst)) - - processFile :: FilePath -> IO $ Maybe (FileInfo, [ScanError]) - processFile file = do - let ext = takeExtension file - let mscanner = formatsSupport ext - forM mscanner ($ file) + gatherScanErrs = foldMap (snd . snd) + + gatherFileStatuses + :: [(FilePath, (FileStatus, [ScanError]))] + -> [(FilePath, FileStatus)] + gatherFileStatuses = map (second fst) + + processFile :: FilePath -> IO (FileStatus, [ScanError]) + processFile file = case mscanner file of + Nothing -> pure (NotScannable, []) + Just scanner -> scanner file <&> _1 %~ Scanned ----------------------------------------------------------- -- Yaml instances diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index eb884114..1d236de7 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -120,6 +120,7 @@ instance (Given ColorMode, Buildable a) => Buildable (WithReferenceLoc a) where data VerifyError = LocalFileDoesNotExist FilePath | LocalFileOutsideRepo FilePath + | LinkTargetNotAddedToGit FilePath | AnchorDoesNotExist Text [Anchor] | AmbiguousAnchorRef FilePath Text (NonEmpty Anchor) | ExternalResourceInvalidUri URIBS.URIParseError @@ -148,6 +149,14 @@ instance Given ColorMode => Buildable VerifyError where #{file} |] + + LinkTargetNotAddedToGit file -> + [int|| + ⛀ Link target is not tracked by Git: + #{file} + Please run "git add" before running xrefcheck or enable --include-untracked CLI option. + |] + AnchorDoesNotExist anchor similar -> case nonEmpty similar of Nothing -> [int|| @@ -339,10 +348,13 @@ verifyRepo (file, fileInfo) <- M.toList files guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file case fileInfo of - Just fi -> do + Scanned fi -> do ref <- _fiReferences fi return (file, ref) - Nothing -> empty -- no support for such file, can do nothing + NotScannable -> empty -- No support for such file, can do nothing. + NotAddedToGit -> empty -- If this file is scannable, we've notified + -- user that we are scanning only files + -- added to Git while gathering RepoInfo. progressRef <- newIORef $ initVerifyProgress (map snd toScan) @@ -504,14 +516,15 @@ verifyReference checkRef mAnchor referredFile = verifying $ unless (isVirtual referredFile) do checkReferredFileIsInsideRepo referredFile - checkReferredFileExists referredFile - case lookupFilePath referredFile $ M.toList files of - Nothing -> pass -- no support for such file, can do nothing - Just referredFileInfo -> whenJust mAnchor $ + mFileStatus <- tryGetFileStatus referredFile + case mFileStatus of + Right (Scanned referredFileInfo) -> whenJust mAnchor $ checkAnchor referredFile (_fiAnchors referredFileInfo) - - lookupFilePath :: FilePath -> [(FilePath, Maybe FileInfo)] -> Maybe FileInfo - lookupFilePath fp = snd <=< find (equalFilePath (expandIndirections fp) . fst) + Right NotScannable -> pass -- no support for such file, can do nothing + Right NotAddedToGit -> throwError (LinkTargetNotAddedToGit referredFile) + Left UntrackedDirectory -> throwError (LinkTargetNotAddedToGit referredFile) + Left TrackedDirectory -> pass -- path leads to directory, currently + -- if such link contain anchor, we ignore it -- expands ".." and "." -- expandIndirections "a/b/../c" = "a/c" @@ -545,18 +558,21 @@ verifyReference nestingChange "." = 0 nestingChange _ = 1 - checkReferredFileExists file = do - unless (fileExists || dirExists) $ - throwError (LocalFileDoesNotExist file) + -- Returns `Nothing` when path corresponds to an existing (and tracked) directory + tryGetFileStatus :: FilePath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus) + tryGetFileStatus file + | Just f <- mFile = return $ Right f + | Just d <- mDir = return $ Left d + | otherwise = throwError (LocalFileDoesNotExist file) where matchesFilePath :: FilePath -> Bool matchesFilePath = equalFilePath $ expandIndirections file - fileExists :: Bool - fileExists = any matchesFilePath $ M.keys files + mFile :: Maybe FileStatus + mFile = (files M.!) <$> find matchesFilePath (M.keys files) - dirExists :: Bool - dirExists = any matchesFilePath dirs + mDir :: Maybe DirectoryStatus + mDir = (dirs M.!) <$> find matchesFilePath (M.keys dirs) checkAnchor file fileAnchors anchor = do checkAnchorReferenceAmbiguity file fileAnchors anchor diff --git a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs index c206a062..c3143086 100644 --- a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs @@ -39,7 +39,7 @@ test_ignoreRegex = give WithoutColors $ in testGroup "Regular expressions performance" [ testCase "Check that only not matched links are verified" $ do scanResult <- allowRewrite showProgressBar $ \rw -> - scanRepo rw formats (config ^. cExclusionsL) root + scanRepo OnlyTracked rw formats (config ^. cExclusionsL) root verifyRes <- allowRewrite showProgressBar $ \rw -> verifyRepo rw config verifyMode root $ srRepoInfo scanResult diff --git a/tests/Test/Xrefcheck/TrailingSlashSpec.hs b/tests/Test/Xrefcheck/TrailingSlashSpec.hs index a16e2d95..8f0f0a1a 100644 --- a/tests/Test/Xrefcheck/TrailingSlashSpec.hs +++ b/tests/Test/Xrefcheck/TrailingSlashSpec.hs @@ -28,7 +28,7 @@ test_slash = testGroup "Trailing forward slash detection" $ root <> "\" should exist") $ do (ScanResult _ (RepoInfo repoInfo _)) <- allowRewrite False $ \rw -> - scanRepo rw format (cExclusions config & ecIgnoreL .~ []) root + scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) root nonExistentFiles <- lefts <$> forM (keys repoInfo) (\filePath -> do predicate <- doesFileExist filePath return $ if predicate diff --git a/tests/golden/check-git/check-git.bats b/tests/golden/check-git/check-git.bats index f1092be4..fb614e44 100644 --- a/tests/golden/check-git/check-git.bats +++ b/tests/golden/check-git/check-git.bats @@ -17,7 +17,7 @@ load '../helpers' assert_output --partial "fatal: not a git repository" } -@test "Git: file not tracked" { +@test "Git: bad file not tracked" { cd $TEST_TEMP_DIR git init @@ -26,10 +26,44 @@ load '../helpers' run xrefcheck + assert_success + assert_output --partial "All repository links are valid." + + # this is printed to stderr + assert_output --partial - <> "git.md" + + to_temp xrefcheck --include-untracked + + assert_diff - <> "git.md" + + touch ./a.md + + git add git.md + + to_temp xrefcheck + + assert_diff - <> "git.md" + + touch ./a.md + + git add git.md + + run xrefcheck --include-untracked + + assert_success + + assert_output --partial "All repository links are valid." +}