Skip to content

Commit

Permalink
[#244] Symlink scanner
Browse files Browse the repository at this point in the history
Problem: As GitHub and GitLab do not render symlinks as the file they
point to, we are considering to implement a new scanner for symlinks
that verifies them up to some extent.

Solution: A scanner that validates the reference from a symlink has been
implemented in the same style as the markdown scanner.
  • Loading branch information
aeqz committed Feb 1, 2023
1 parent 4a33023 commit 643adf9
Show file tree
Hide file tree
Showing 16 changed files with 223 additions and 73 deletions.
12 changes: 6 additions & 6 deletions src/Xrefcheck/Command.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ import Xrefcheck.Config
import Xrefcheck.Core (Flavor (..))
import Xrefcheck.Progress (allowRewrite)
import Xrefcheck.Scan
(FormatsSupport, ScanError (..), ScanResult (..), reportScanErrs, scanRepo,
specificFormatsSupport)
import Xrefcheck.Scanners.Markdown (markdownSupport)
import Xrefcheck.Scanners.Symlink (symlinkSupport)
import Xrefcheck.System (askWithinCI)
import Xrefcheck.Util
import Xrefcheck.Verify (reportVerifyErrs, verifyErrors, verifyRepo)
Expand All @@ -34,9 +33,10 @@ readConfig path = fmap overrideConfig do
decodeFileEither path
>>= either (error . toText . prettyPrintParseException) pure

formats :: ScannersConfig -> FormatsSupport
formats ScannersConfig{..} = specificFormatsSupport
configuredFileSupport :: ScannersConfig -> FileSupport
configuredFileSupport ScannersConfig{..} = firstFileSupport
[ markdownSupport scMarkdown
, symlinkSupport
]

findFirstExistingFile :: [FilePath] -> IO (Maybe FilePath)
Expand Down Expand Up @@ -74,8 +74,8 @@ defaultAction Options{..} = do

(ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do
let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions
formatsSupport = formats $ cScanners config
scanRepo oScanPolicy rw formatsSupport fullConfig oRoot
fileSupport = configuredFileSupport $ cScanners config
scanRepo oScanPolicy rw fileSupport fullConfig oRoot

when oVerbose $
fmt [int||
Expand Down
15 changes: 10 additions & 5 deletions src/Xrefcheck/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -235,36 +235,41 @@ instance Given ColorMode => Buildable Reference where
case rifLink of
FLLocal ->
[int||
reference #{paren $ colorIfNeeded Green "file-local"} #{rPos}:
reference #{paren $ colorIfNeeded Green "file-local"}#{posSep}#{rPos}:
- text: #s{rName}
- anchor: #{rifAnchor ?: styleIfNeeded Faint "-"}
|]
FLRelative link ->
[int||
reference #{paren $ colorIfNeeded Yellow "relative"} #{rPos}:
reference #{paren $ colorIfNeeded Yellow "relative"}#{posSep}#{rPos}:
- text: #s{rName}
- link: #{link}
- anchor: #{rifAnchor ?: styleIfNeeded Faint "-"}
|]
FLAbsolute link ->
[int||
reference #{paren $ colorIfNeeded Yellow "absolute"} #{rPos}:
reference #{paren $ colorIfNeeded Yellow "absolute"}#{posSep}#{rPos}:
- text: #s{rName}
- link: /#{link}
- anchor: #{rifAnchor ?: styleIfNeeded Faint "-"}
|]
RIExternal (ELUrl url) ->
[int||
reference #{paren $ colorIfNeeded Red "external"} #{rPos}:
reference #{paren $ colorIfNeeded Red "external"}#{posSep}#{rPos}:
- text: #s{rName}
- link: #{url}
|]
RIExternal (ELOther url) ->
[int||
reference (other) #{rPos}:
reference (other)#{posSep}#{rPos}:
- text: #s{rName}
- link: #{url}
|]
where
posSep :: Text
posSep = case rPos of
Position Nothing -> ""
_ -> " "

instance Given ColorMode => Buildable AnchorType where
build = styleIfNeeded Faint . \case
Expand Down
49 changes: 24 additions & 25 deletions src/Xrefcheck/Scan.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,25 @@
module Xrefcheck.Scan
( ExclusionConfig
, ExclusionConfig' (..)
, Extension
, ScanAction
, FormatsSupport
, FileSupport
, ReadDirectoryMode(..)
, ScanAction
, ScanError (..)
, ScanErrorDescription (..)
, ScanResult (..)
, ScanStage (..)

, mkParseScanError
, mkGatherScanError
, scanRepo
, specificFormatsSupport
, defaultCompOption
, defaultExecOption
, ecIgnoreL
, ecIgnoreLocalRefsToL
, ecIgnoreRefsFromL
, ecIgnoreExternalRefsToL
, firstFileSupport
, mkGatherScanError
, mkParseScanError
, reportScanErrs
, scanRepo
) where

import Universum
Expand Down Expand Up @@ -70,11 +69,14 @@ makeLensesWith postfixFields ''ExclusionConfig'
-- | File extension, dot included.
type Extension = String

-- | Whether the file is a symlink.
type IsSymlink = Bool

-- | Way to parse a file.
type ScanAction = FilePath -> RelPosixLink -> IO (FileInfo, [ScanError 'Parse])

-- | All supported ways to parse a file.
type FormatsSupport = Extension -> Maybe ScanAction
type FileSupport = IsSymlink -> Extension -> Maybe ScanAction

data ScanResult = ScanResult
{ srScanErrors :: [ScanError 'Gather]
Expand Down Expand Up @@ -153,14 +155,9 @@ instance Buildable ScanErrorDescription where
UnrecognisedErr txt -> [int||Unrecognised option "#{txt}" perhaps you meant \
<"ignore link"|"ignore paragraph"|"ignore all">|]

specificFormatsSupport :: [([Extension], ScanAction)] -> FormatsSupport
specificFormatsSupport formats = \ext -> M.lookup ext formatsMap
where
formatsMap = M.fromList
[ (extension, parser)
| (extensions, parser) <- formats
, extension <- extensions
]
firstFileSupport :: [FileSupport] -> FileSupport
firstFileSupport fs isSymlink =
safeHead . catMaybes <$> traverse ($ isSymlink) fs

data ReadDirectoryMode
= RdmTracked
Expand Down Expand Up @@ -211,7 +208,7 @@ scanRepo
:: MonadIO m
=> ScanPolicy
-> Rewrite
-> FormatsSupport
-> FileSupport
-> ExclusionConfig
-> FilePath
-> m ScanResult
Expand All @@ -228,12 +225,13 @@ scanRepo scanMode rw formatsSupport config root = do
in liftIO $ (gatherScanErrs &&& gatherFileStatuses)
<$> readDirectoryWith mode config processFile root

notProcessedFiles <- case scanMode of
notProcessedFiles <- case scanMode of
OnlyTracked -> liftIO $
readDirectoryWith RdmUntracked config (const $ pure NotAddedToGit) root
IncludeUntracked -> pure []

let scannableNotProcessedFiles = filter (isJust . mscanner . fst) notProcessedFiles
scannableNotProcessedFiles <- liftIO $
filterM (fmap isJust . fileScanner . fst) notProcessedFiles

whenJust (nonEmpty $ map fst scannableNotProcessedFiles) $ \files -> hPutStrLn @Text stderr
[int|A|
Expand All @@ -252,8 +250,10 @@ scanRepo scanMode rw formatsSupport config root = do
<> fmap (, UntrackedDirectory) untrackedDirs)
}
where
mscanner :: RelPosixLink -> Maybe ScanAction
mscanner = formatsSupport . takeExtension
fileScanner :: RelPosixLink -> IO (Maybe ScanAction)
fileScanner file = do
isSymlink <- pathIsSymbolicLink (filePathFromRoot root file)
pure $ formatsSupport isSymlink $ takeExtension file

gatherScanErrs
:: [(RelPosixLink, (FileStatus, [ScanError 'Parse]))]
Expand All @@ -267,10 +267,9 @@ scanRepo scanMode rw formatsSupport config root = do
gatherFileStatuses = map (second fst)

processFile :: RelPosixLink -> IO (FileStatus, [ScanError 'Parse])
processFile file =
ifM (pathIsSymbolicLink (filePathFromRoot root file))
(pure (NotScannable, []))
case mscanner file of
processFile file = do
mScanner <- fileScanner file
case mScanner of
Nothing -> pure (NotScannable, [])
Just scanner -> scanner root file <&> _1 %~ Scanned

Expand Down
10 changes: 6 additions & 4 deletions src/Xrefcheck/Scanners/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

{-# OPTIONS_GHC -fno-warn-orphans #-}

-- | Markdown documents markdownScanner.

-- | Scanner for gathering references to verify from Markdown documents.
module Xrefcheck.Scanners.Markdown
( MarkdownConfig (..)

Expand Down Expand Up @@ -410,5 +409,8 @@ markdownScanner config root file =
parseFileInfo config . decodeUtf8
<$> BSL.readFile (filePathFromRoot root file)

markdownSupport :: MarkdownConfig -> ([Extension], ScanAction)
markdownSupport config = ([".md"], markdownScanner config)
markdownSupport :: MarkdownConfig -> FileSupport
markdownSupport config isSymlink extension = do
guard $ extension == ".md"
guard $ not isSymlink
pure $ markdownScanner config
36 changes: 36 additions & 0 deletions src/Xrefcheck/Scanners/Symlink.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{- SPDX-FileCopyrightText: 2023 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}

-- | Scanner for gathering references to verify from symlinks.
--
-- A symlink's reference corresponds to the file it points to.
module Xrefcheck.Scanners.Symlink
( symlinkScanner
, symlinkSupport
) where

import Universum

import System.Directory (getSymbolicLinkTarget)

import Xrefcheck.Core
import Xrefcheck.Scan
import Xrefcheck.System

symlinkScanner :: ScanAction
symlinkScanner root path = do
rLink <- unRelPosixLink . mkRelPosixLink
<$> getSymbolicLinkTarget (filePathFromRoot root path)

let rName = "Symbolic Link"
rPos = Position Nothing
rInfo = referenceInfo rLink

pure (FileInfo [Reference {rName, rPos, rInfo}] [], [])

symlinkSupport :: FileSupport
symlinkSupport isSymlink _ = do
guard isSymlink
pure symlinkScanner
8 changes: 4 additions & 4 deletions tests/Test/Xrefcheck/IgnoreRegexSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ import Text.Regex.TDFA (Regex)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress (allowRewrite)
import Xrefcheck.Scan (ScanResult (..), ecIgnoreExternalRefsToL, scanRepo, specificFormatsSupport)
import Xrefcheck.Scan
import Xrefcheck.Scanners.Markdown
import Xrefcheck.Util (ColorMode (WithoutColors))
import Xrefcheck.Verify (VerifyError, VerifyResult, WithReferenceLoc (..), verifyErrors, verifyRepo)
import Xrefcheck.Verify

test_ignoreRegex :: TestTree
test_ignoreRegex = give WithoutColors $
let root = "tests/markdowns/without-annotations"
showProgressBar = False
formats = specificFormatsSupport [markdownSupport defGithubMdConfig]
fileSupport = firstFileSupport [markdownSupport defGithubMdConfig]
verifyMode = ExternalOnlyMode

linksTxt =
Expand All @@ -39,7 +39,7 @@ test_ignoreRegex = give WithoutColors $
in testGroup "Regular expressions performance"
[ testCase "Check that only not matched links are verified" $ do
scanResult <- allowRewrite showProgressBar $ \rw ->
scanRepo OnlyTracked rw formats (config ^. cExclusionsL) root
scanRepo OnlyTracked rw fileSupport (config ^. cExclusionsL) root

verifyRes <- allowRewrite showProgressBar $ \rw ->
verifyRepo rw config verifyMode $ srRepoInfo scanResult
Expand Down
4 changes: 2 additions & 2 deletions tests/Test/Xrefcheck/TrailingSlashSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ import Xrefcheck.Util
test_slash :: TestTree
test_slash = testGroup "Trailing forward slash detection" $
let config = defConfig GitHub
format = specificFormatsSupport [markdownSupport (scMarkdown (cScanners config))]
fileSupport = firstFileSupport [markdownSupport (scMarkdown (cScanners config))]
in roots <&> \root ->
testCase ("All the files within the root \"" <>
root <>
"\" should exist") $ do
(ScanResult _ RepoInfo{..}) <- allowRewrite False $ \rw ->
scanRepo OnlyTracked rw format (cExclusions config & ecIgnoreL .~ []) root
scanRepo OnlyTracked rw fileSupport (cExclusions config & ecIgnoreL .~ []) root
nonExistentFiles <- lefts <$> forM (fst . snd <$> toPairs riFiles) (\file -> do
predicate <- doesFileExist . filePathFromRoot root $ file
return $ if predicate
Expand Down
2 changes: 1 addition & 1 deletion tests/golden/check-autolinks/check-autolinks.bats
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ assert_diff - <<EOF
file-with-autolinks.md:
- references:
- reference (external) :
- reference (external):
- text: "https://www.google.com/doodles"
- link: https://www.google.com/doodles
- reference (external) at src:8:0-18:
Expand Down
35 changes: 33 additions & 2 deletions tests/golden/check-symlinks/check-symlinks.bats
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,39 @@ load '../helpers/bats-file/load'
load '../helpers'


@test "Checking that symlinks are not processed" {
@test "Checking that symlinks are not processed as md files" {
cp config-ignore.yaml $TEST_TEMP_DIR
cp expected1.gold $TEST_TEMP_DIR
cp -R dir $TEST_TEMP_DIR

cd $TEST_TEMP_DIR
touch dir/a
ln -s ../d.md outside.md
ln -s dir/b.md ok.md
ln -s dir/c.md broken.md

git init
git add ./*

to_temp xrefcheck -v -c config-ignore.yaml

assert_diff expected1.gold
}

@test "Symlinks validation" {
cp expected2.gold $TEST_TEMP_DIR
cp -R dir $TEST_TEMP_DIR

cd $TEST_TEMP_DIR
touch dir/a
ln -s ../d.md outside.md
ln -s dir/b.md ok.md
ln -s dir/c.md broken.md

git init
git add ./*

to_temp xrefcheck -v

assert_diff expected.gold
assert_diff expected2.gold
}
12 changes: 12 additions & 0 deletions tests/golden/check-symlinks/config-ignore.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# SPDX-FileCopyrightText: 2023 Serokell <https://serokell.io>
#
# SPDX-License-Identifier: Unlicense

exclusions:
ignore:
- broken.md
- outside.md

scanners:
markdown:
flavor: GitHub
Empty file removed tests/golden/check-symlinks/dir/a
Empty file.
4 changes: 1 addition & 3 deletions tests/golden/check-symlinks/dir/b.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@

[Empty file](a)

[Symlink ref](../s.md)

[Symlink ref with anchor](../s.md#a)
[Some symlink](../ok.md)
20 changes: 0 additions & 20 deletions tests/golden/check-symlinks/expected.gold

This file was deleted.

Loading

0 comments on commit 643adf9

Please sign in to comment.