From da25917fa6e0cefdc74dff00622f2c1060e0add1 Mon Sep 17 00:00:00 2001 From: Anton Sorokin Date: Wed, 16 Nov 2022 00:48:52 +0200 Subject: [PATCH 1/2] [#200] Warnings about files that weren't added to git yet Problem: after 0.2.2 release, xrefcheck cares only about files that were added to Git. That can be confusing for users (see #200) Solution: If a scannable (currently it means markdown) file is not ignored (by git or via config) and not tracked by git, print a warning to stderr while scanning repo. If a link target such file, change error message from "file not exists" to `Link target is not tracked by Git` Suggest user to run "git add" before running xrefcheck in both cases. To do this, I've changed the `RepoInfo` type, so it also contains information about untracked files now. --- src/Xrefcheck/Core.hs | 37 +++++++++-- src/Xrefcheck/Scan.hs | 88 ++++++++++++++++++++------- src/Xrefcheck/Verify.hs | 48 ++++++++++----- tests/golden/check-git/check-git.bats | 45 +++++++++++++- 4 files changed, 172 insertions(+), 46 deletions(-) diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index ffcc268c..454606db 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -122,12 +122,36 @@ makeLenses ''FileInfo instance Default FileInfo where def = diffToFileInfo mempty +data ScanPolicy + = OnlyTracked + -- ^ Scan and treat as existing only files tracked by Git. + -- Warn when there are scannable files not added to Git yet. + | IncludeUntracked + -- ^ Also scan and treat as existing + -- files that were neither tracked nor ignored by Git. + deriving stock (Show, Eq) + +data FileStatus + = Scanned FileInfo + | NotScannable + -- ^ Files that are not supported by our scanners + | NotAddedToGit + -- ^ We are not scanning files that are not added to git + -- unless --include-untracked CLI option was enabled, but we're + -- gathering information about them to improve reports. + deriving stock (Show) + +data DirectoryStatus + = TrackedDirectory + | UntrackedDirectory + deriving stock (Show) + -- | All tracked files and directories. data RepoInfo = RepoInfo - { riFiles :: Map FilePath (Maybe FileInfo) - -- ^ Files from the repo with `FileInfo` attached to files that we can scan. - , riDirectories :: Set FilePath - -- ^ Tracked directories. + { riFiles :: Map FilePath FileStatus + -- ^ Files from the repo with `FileInfo` attached to files that we've scanned. + , riDirectories :: Map FilePath DirectoryStatus + -- ^ Directories containing those files. } deriving stock (Show) ----------------------------------------------------------- @@ -180,8 +204,9 @@ instance Given ColorMode => Buildable FileInfo where |] instance Given ColorMode => Buildable RepoInfo where - build (RepoInfo (nonEmpty . mapMaybe sequence . toPairs -> Just m) _) = - interpolateBlockListF' "⮚" buildFileReport m + build (RepoInfo m _) + | Just scanned <- nonEmpty [(name, info) | (name, Scanned info) <- toPairs m] + = interpolateBlockListF' "⮚" buildFileReport scanned where buildFileReport (name, info) = [int|| diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index 68163c5f..9f0d5d32 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -14,6 +14,7 @@ module Xrefcheck.Scan , ScanAction , FormatsSupport , RepoInfo (..) + , ReadDirectoryMode(..) , ScanError (..) , ScanErrorDescription (..) , ScanResult (..) @@ -138,18 +139,41 @@ specificFormatsSupport formats = \ext -> M.lookup ext formatsMap , extension <- extensions ] --- | Process files that are tracked by git and not ignored by the config. +data ReadDirectoryMode + = RdmTracked + -- ^ Consider files tracked by Git, obtained from "git ls-files" + | RdmUntracked + -- ^ Consider files that are not tracked nor ignored by Git, obtained from + -- "git ls-files --others --exclude-standard" + | RdmBothTrackedAndUtracked + -- ^ Combine output from commands listed above, so we consider all files + -- except ones that are explicitly ignored by Git + +-- | Process files that match given @ReadDirectoryMode@ and aren't ignored by the config. readDirectoryWith - :: forall a. ExclusionConfig + :: forall a. ReadDirectoryMode + -> ExclusionConfig -> (FilePath -> IO a) -> FilePath -> IO [(FilePath, a)] -readDirectoryWith config scanner root = +readDirectoryWith mode config scanner root = traverse scanFile . filter (not . isIgnored) . fmap (location ) - . L.lines =<< readCreateProcess (shell "git ls-files"){cwd = Just root} "" + . L.lines =<< getFiles + where + + getFiles = case mode of + RdmTracked -> getTrackedFiles + RdmUntracked -> getUntrackedFiles + RdmBothTrackedAndUtracked -> liftA2 (<>) getTrackedFiles getUntrackedFiles + + getTrackedFiles = readCreateProcess + (shell "git ls-files"){cwd = Just root} "" + getUntrackedFiles = readCreateProcess + (shell "git ls-files --others --exclude-standard"){cwd = Just root} "" + scanFile :: FilePath -> IO (FilePath, a) scanFile = sequence . (normaliseWithNoTrailing &&& scanner) @@ -172,14 +196,35 @@ scanRepo rw formatsSupport config root = do when (not $ isDirectory root) $ die $ "Repository's root does not seem to be a directory: " <> root - (errs, fileInfos) <- liftIO - $ (gatherScanErrs &&& gatherFileInfos) - <$> readDirectoryWith config processFile root + (errs, processedFiles) <- liftIO + $ (gatherScanErrs &&& gatherFileStatuses) + <$> readDirectoryWith RdmTracked config processFile root + + notProcessedFiles <- liftIO $ + readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root - let dirs = fromList $ foldMap (getDirs . fst) fileInfos + let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles - return . ScanResult errs $ RepoInfo (M.fromList fileInfos) dirs + whenJust (nonEmpty $ map fst scannableNotProcessedFiles) $ \files -> hPutStrLn @Text stderr + [int|A| + Those files are not added by Git, so we're not scanning them: + #{interpolateBlockListF files} + Please run "git add" before running xrefcheck or enable \ + --include-untracked CLI option to check these files. + |] + + let trackedDirs = foldMap (getDirs . fst) processedFiles + untrackedDirs = foldMap (getDirs . fst) notProcessedFiles + return . ScanResult errs $ RepoInfo + { riFiles = M.fromList $ processedFiles <> notProcessedFiles + , riDirectories = M.fromList + $ map (, TrackedDirectory) trackedDirs + <> map (, UntrackedDirectory) untrackedDirs + } where + mscanner :: FilePath -> Maybe ScanAction + mscanner = formatsSupport . takeExtension + isDirectory :: FilePath -> Bool isDirectory = readingSystem . doesDirectoryExist @@ -188,20 +233,19 @@ scanRepo rw formatsSupport config root = do getDirs = scanl () "" . splitDirectories . takeDirectory gatherScanErrs - :: [(FilePath, Maybe (FileInfo, [ScanError]))] + :: [(FilePath, (FileStatus, [ScanError]))] -> [ScanError] - gatherScanErrs = fold . mapMaybe (fmap snd . snd) - - gatherFileInfos - :: [(FilePath, Maybe (FileInfo, [ScanError]))] - -> [(FilePath, Maybe FileInfo)] - gatherFileInfos = map (second (fmap fst)) - - processFile :: FilePath -> IO $ Maybe (FileInfo, [ScanError]) - processFile file = do - let ext = takeExtension file - let mscanner = formatsSupport ext - forM mscanner ($ file) + gatherScanErrs = foldMap (snd . snd) + + gatherFileStatuses + :: [(FilePath, (FileStatus, [ScanError]))] + -> [(FilePath, FileStatus)] + gatherFileStatuses = map (second fst) + + processFile :: FilePath -> IO (FileStatus, [ScanError]) + processFile file = case mscanner file of + Nothing -> pure (NotScannable, []) + Just scanner -> scanner file <&> _1 %~ Scanned ----------------------------------------------------------- -- Yaml instances diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index eb884114..1d236de7 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -120,6 +120,7 @@ instance (Given ColorMode, Buildable a) => Buildable (WithReferenceLoc a) where data VerifyError = LocalFileDoesNotExist FilePath | LocalFileOutsideRepo FilePath + | LinkTargetNotAddedToGit FilePath | AnchorDoesNotExist Text [Anchor] | AmbiguousAnchorRef FilePath Text (NonEmpty Anchor) | ExternalResourceInvalidUri URIBS.URIParseError @@ -148,6 +149,14 @@ instance Given ColorMode => Buildable VerifyError where #{file} |] + + LinkTargetNotAddedToGit file -> + [int|| + ⛀ Link target is not tracked by Git: + #{file} + Please run "git add" before running xrefcheck or enable --include-untracked CLI option. + |] + AnchorDoesNotExist anchor similar -> case nonEmpty similar of Nothing -> [int|| @@ -339,10 +348,13 @@ verifyRepo (file, fileInfo) <- M.toList files guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file case fileInfo of - Just fi -> do + Scanned fi -> do ref <- _fiReferences fi return (file, ref) - Nothing -> empty -- no support for such file, can do nothing + NotScannable -> empty -- No support for such file, can do nothing. + NotAddedToGit -> empty -- If this file is scannable, we've notified + -- user that we are scanning only files + -- added to Git while gathering RepoInfo. progressRef <- newIORef $ initVerifyProgress (map snd toScan) @@ -504,14 +516,15 @@ verifyReference checkRef mAnchor referredFile = verifying $ unless (isVirtual referredFile) do checkReferredFileIsInsideRepo referredFile - checkReferredFileExists referredFile - case lookupFilePath referredFile $ M.toList files of - Nothing -> pass -- no support for such file, can do nothing - Just referredFileInfo -> whenJust mAnchor $ + mFileStatus <- tryGetFileStatus referredFile + case mFileStatus of + Right (Scanned referredFileInfo) -> whenJust mAnchor $ checkAnchor referredFile (_fiAnchors referredFileInfo) - - lookupFilePath :: FilePath -> [(FilePath, Maybe FileInfo)] -> Maybe FileInfo - lookupFilePath fp = snd <=< find (equalFilePath (expandIndirections fp) . fst) + Right NotScannable -> pass -- no support for such file, can do nothing + Right NotAddedToGit -> throwError (LinkTargetNotAddedToGit referredFile) + Left UntrackedDirectory -> throwError (LinkTargetNotAddedToGit referredFile) + Left TrackedDirectory -> pass -- path leads to directory, currently + -- if such link contain anchor, we ignore it -- expands ".." and "." -- expandIndirections "a/b/../c" = "a/c" @@ -545,18 +558,21 @@ verifyReference nestingChange "." = 0 nestingChange _ = 1 - checkReferredFileExists file = do - unless (fileExists || dirExists) $ - throwError (LocalFileDoesNotExist file) + -- Returns `Nothing` when path corresponds to an existing (and tracked) directory + tryGetFileStatus :: FilePath -> ExceptT VerifyError IO (Either DirectoryStatus FileStatus) + tryGetFileStatus file + | Just f <- mFile = return $ Right f + | Just d <- mDir = return $ Left d + | otherwise = throwError (LocalFileDoesNotExist file) where matchesFilePath :: FilePath -> Bool matchesFilePath = equalFilePath $ expandIndirections file - fileExists :: Bool - fileExists = any matchesFilePath $ M.keys files + mFile :: Maybe FileStatus + mFile = (files M.!) <$> find matchesFilePath (M.keys files) - dirExists :: Bool - dirExists = any matchesFilePath dirs + mDir :: Maybe DirectoryStatus + mDir = (dirs M.!) <$> find matchesFilePath (M.keys dirs) checkAnchor file fileAnchors anchor = do checkAnchorReferenceAmbiguity file fileAnchors anchor diff --git a/tests/golden/check-git/check-git.bats b/tests/golden/check-git/check-git.bats index f1092be4..d3f62b95 100644 --- a/tests/golden/check-git/check-git.bats +++ b/tests/golden/check-git/check-git.bats @@ -17,7 +17,7 @@ load '../helpers' assert_output --partial "fatal: not a git repository" } -@test "Git: file not tracked" { +@test "Git: bad file not tracked" { cd $TEST_TEMP_DIR git init @@ -26,10 +26,19 @@ load '../helpers' run xrefcheck + assert_success + assert_output --partial "All repository links are valid." + + # this is printed to stderr + assert_output --partial - <> "git.md" + + touch ./a.md + + git add git.md + + to_temp xrefcheck + + assert_diff - < Date: Wed, 16 Nov 2022 00:52:43 +0200 Subject: [PATCH 2/2] [#200] Add `--include-untracked` CLI option Problem: xrefcheck checks only files that are tracked by Git, but sometimes we want to run xrefcheck on files without adding them to Git, e.g. when we want to test some generator of markdown files or when we actively create markdown files during development. Solution: add option to treat files that were neither added to git nor ignored as existing. --- CHANGES.md | 4 +++ src/Xrefcheck/CLI.hs | 7 +++- src/Xrefcheck/Command.hs | 2 +- src/Xrefcheck/Scan.hs | 18 ++++++---- tests/Test/Xrefcheck/IgnoreRegexSpec.hs | 2 +- tests/Test/Xrefcheck/TrailingSlashSpec.hs | 2 +- tests/golden/check-git/check-git.bats | 42 +++++++++++++++++++++++ 7 files changed, 67 insertions(+), 10 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 456f42d6..173f7a9f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -22,6 +22,10 @@ Unreleased `` instead of `` should be used to disable checking for links in file, so it's clearer that file itself is not ignored (and links can target it). +* [#215](https://github.com/serokell/xrefcheck/pull/215) + + Now we notify user when there are scannable files that were not added to Git + yet. Also added CLI option `--include-untracked` to scan such files and treat + as existing. 0.2.2 ========== diff --git a/src/Xrefcheck/CLI.hs b/src/Xrefcheck/CLI.hs index 48cef339..1aa809d1 100644 --- a/src/Xrefcheck/CLI.hs +++ b/src/Xrefcheck/CLI.hs @@ -81,6 +81,7 @@ data Options = Options , oColorMode :: ColorMode , oExclusionOptions :: ExclusionOptions , oNetworkingOptions :: NetworkingOptions + , oScanPolicy :: ScanPolicy } data ExclusionOptions = ExclusionOptions @@ -181,9 +182,13 @@ optionsParser = do ] oColorMode <- flag WithColors WithoutColors $ long "no-color" <> - help "Disable ANSI coloring of output" + help "Disable ANSI coloring of output." oExclusionOptions <- exclusionOptionsParser oNetworkingOptions <- networkingOptionsParser + oScanPolicy <- flag OnlyTracked IncludeUntracked $ + long "include-untracked" <> + help "Scan and treat as existing files that were not added to Git.\ + \ Files explicitly ignored by Git are always ignored by xrefcheck." return Options{..} exclusionOptionsParser :: Parser ExclusionOptions diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index adaf741b..16620a86 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -70,7 +70,7 @@ defaultAction Options{..} = do (ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions - scanRepo rw (formats $ cScanners config) fullConfig oRoot + scanRepo oScanPolicy rw (formats $ cScanners config) fullConfig oRoot when oVerbose $ fmt [int|| diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index 9f0d5d32..175d6f8a 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -189,19 +189,25 @@ readDirectoryWith mode config scanner root = scanRepo :: MonadIO m - => Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult -scanRepo rw formatsSupport config root = do + => ScanPolicy -> Rewrite -> FormatsSupport -> ExclusionConfig -> FilePath -> m ScanResult +scanRepo scanMode rw formatsSupport config root = do putTextRewrite rw "Scanning repository..." when (not $ isDirectory root) $ die $ "Repository's root does not seem to be a directory: " <> root - (errs, processedFiles) <- liftIO + (errs, processedFiles) <- + let mode = case scanMode of + OnlyTracked -> RdmTracked + IncludeUntracked -> RdmBothTrackedAndUtracked + in liftIO $ (gatherScanErrs &&& gatherFileStatuses) - <$> readDirectoryWith RdmTracked config processFile root + <$> readDirectoryWith mode config processFile root - notProcessedFiles <- liftIO $ - readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root + notProcessedFiles <- case scanMode of + OnlyTracked -> liftIO $ + readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root + IncludeUntracked -> pure [] let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles diff --git a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs index c206a062..c3143086 100644 --- a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs @@ -39,7 +39,7 @@ test_ignoreRegex = give WithoutColors $ in testGroup "Regular expressions performance" [ testCase "Check that only not matched links are verified" $ do scanResult <- allowRewrite showProgressBar $ \rw -> - scanRepo rw formats (config ^. cExclusionsL) root + scanRepo OnlyTracked rw formats (config ^. cExclusionsL) root verifyRes <- allowRewrite showProgressBar $ \rw -> verifyRepo rw config verifyMode root $ srRepoInfo scanResult diff --git a/tests/Test/Xrefcheck/TrailingSlashSpec.hs b/tests/Test/Xrefcheck/TrailingSlashSpec.hs index a16e2d95..8f0f0a1a 100644 --- a/tests/Test/Xrefcheck/TrailingSlashSpec.hs +++ b/tests/Test/Xrefcheck/TrailingSlashSpec.hs @@ -28,7 +28,7 @@ test_slash = testGroup "Trailing forward slash detection" $ root <> "\" should exist") $ do (ScanResult _ (RepoInfo repoInfo _)) <- allowRewrite False $ \rw -> - scanRepo rw format (cExclusions config & ecIgnoreL .~ []) root + scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) root nonExistentFiles <- lefts <$> forM (keys repoInfo) (\filePath -> do predicate <- doesFileExist filePath return $ if predicate diff --git a/tests/golden/check-git/check-git.bats b/tests/golden/check-git/check-git.bats index d3f62b95..fb614e44 100644 --- a/tests/golden/check-git/check-git.bats +++ b/tests/golden/check-git/check-git.bats @@ -38,6 +38,31 @@ Please run "git add" before running xrefcheck or enable --include-untracked CLI EOF } +@test "Git: bad file not tracked, --include-untracked enabled, check failure" { + cd $TEST_TEMP_DIR + + git init + + echo "[a](./a.md)" >> "git.md" + + to_temp xrefcheck --include-untracked + + assert_diff - <> "git.md" + + touch ./a.md + + git add git.md + + run xrefcheck --include-untracked + + assert_success + + assert_output --partial "All repository links are valid." +}