Skip to content

Commit

Permalink
[#170] Reorganize top-level config keys
Browse files Browse the repository at this point in the history
Problem: At the moment, the config yaml is organized in 3 top-level
keys: `traversal`, `verification` and `scanners`. However, the distinction
between the "traversal" and the "verification" stages is not relevant
to the user. This is entirely an internal concern.

Solution: Reorganize yaml config options under `exclusions`, `networking`
and `scanners`.
  • Loading branch information
Sereja313 committed Sep 28, 2022
1 parent 8b8d8ee commit 108f9d3
Show file tree
Hide file tree
Showing 22 changed files with 238 additions and 199 deletions.
4 changes: 3 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ Unreleased
* [#145](https://github.com/serokell/xrefcheck/pull/145)
+ Add check that there is no unknown fields in config.
* [#158](https://github.com/serokell/xrefcheck/pull/158)
+ Fixed bug when we reported footnotes as broken links
+ Fixed bug when we reported footnotes as broken links.
* [#163](https://github.com/serokell/xrefcheck/pull/163)
+ Fixed an issue where the progress bar thread might be unexpectedly cancelled and jumble up the output.
* [#175](https://github.com/serokell/xrefcheck/pull/175)
+ Reorganize top-level config keys.

0.2.1
==========
Expand Down
8 changes: 4 additions & 4 deletions ftp-tests/Test/Xrefcheck/FtpLinks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ import Test.Tasty (TestTree, askOption, testGroup)
import Test.Tasty.HUnit (assertBool, assertFailure, testCase, (@?=))
import Test.Tasty.Options as Tasty (IsOption (..), OptionDescription (Option), safeRead)

import Xrefcheck.Config
(Config' (cVerification), VerifyConfig, VerifyConfig' (vcIgnoreRefs), defConfig)
import Xrefcheck.Config (Config, cExclusionsL, defConfig)
import Xrefcheck.Core (Flavor (GitHub))
import Xrefcheck.Scan (ecIgnoreRefsL)
import Xrefcheck.Verify
(VerifyError (..), VerifyResult (VerifyResult), checkExternalResource, verifyErrors)

Expand All @@ -42,8 +42,8 @@ instance IsOption FtpHostOpt where
)


config :: VerifyConfig
config = (cVerification $ defConfig GitHub) { vcIgnoreRefs = [] }
config :: Config
config = defConfig GitHub & cExclusionsL . ecIgnoreRefsL .~ []

test_FtpLinks :: TestTree
test_FtpLinks = askOption $ \(FtpHostOpt host) -> do
Expand Down
69 changes: 35 additions & 34 deletions src/Xrefcheck/CLI.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@

module Xrefcheck.CLI
( VerifyMode (..)
, shouldCheckLocal
, shouldCheckExternal
, ExclusionOptions (..)
, Command (..)
, Options (..)
, VerifyOptions (..)
, addVerifyOptions
, TraversalOptions (..)
, addTraversalOptions
, NetworkingOptions (..)

, addNetworkingOptions
, shouldCheckLocal
, shouldCheckExternal
, addExclusionOptions
, defaultConfigPaths
, getCommand
) where
Expand All @@ -33,7 +34,7 @@ import Options.Applicative.Help.Pretty (Doc, displayS, fill, fillSep, indent, re
import Options.Applicative.Help.Pretty qualified as Pretty

import Paths_xrefcheck (version)
import Xrefcheck.Config (VerifyConfig, VerifyConfig' (..))
import Xrefcheck.Config (NetworkingConfig, NetworkingConfig' (..))
import Xrefcheck.Core
import Xrefcheck.Scan
import Xrefcheck.System (RelGlobPattern (..))
Expand Down Expand Up @@ -69,34 +70,34 @@ data Command
| DumpConfig Flavor FilePath

data Options = Options
{ oConfigPath :: Maybe FilePath
, oRoot :: FilePath
, oMode :: VerifyMode
, oVerbose :: Bool
, oShowProgressBar :: Maybe Bool
, oTraversalOptions :: TraversalOptions
, oVerifyOptions :: VerifyOptions
{ oConfigPath :: Maybe FilePath
, oRoot :: FilePath
, oMode :: VerifyMode
, oVerbose :: Bool
, oShowProgressBar :: Maybe Bool
, oExclusionOptions :: ExclusionOptions
, oNetworkingOptions :: NetworkingOptions
}

data TraversalOptions = TraversalOptions
data ExclusionOptions = ExclusionOptions
{ toIgnored :: [RelGlobPattern]
}

addTraversalOptions :: TraversalConfig -> TraversalOptions -> TraversalConfig
addTraversalOptions TraversalConfig{..} (TraversalOptions ignored) =
TraversalConfig
{ tcIgnored = tcIgnored ++ ignored
addExclusionOptions :: ExclusionConfig -> ExclusionOptions -> ExclusionConfig
addExclusionOptions ExclusionConfig{..} (ExclusionOptions ignored) =
ExclusionConfig
{ ecIgnored = ecIgnored ++ ignored
, ..
}

data VerifyOptions = VerifyOptions
{ voMaxRetries :: Maybe Int
data NetworkingOptions = NetworkingOptions
{ noMaxRetries :: Maybe Int
}

addVerifyOptions :: VerifyConfig -> VerifyOptions -> VerifyConfig
addVerifyOptions VerifyConfig{..} (VerifyOptions maxRetries) =
VerifyConfig
{ vcMaxRetries = fromMaybe vcMaxRetries maxRetries
addNetworkingOptions :: NetworkingConfig -> NetworkingOptions -> NetworkingConfig
addNetworkingOptions NetworkingConfig{..} (NetworkingOptions maxRetries) =
NetworkingConfig
{ ncMaxRetries = fromMaybe ncMaxRetries maxRetries
, ..
}

Expand Down Expand Up @@ -170,29 +171,29 @@ optionsParser = do
help "Do not display progress bar during verification."
, pure Nothing
]
oTraversalOptions <- traversalOptionsParser
oVerifyOptions <- verifyOptionsParser
oExclusionOptions <- exclusionOptionsParser
oNetworkingOptions <- networkingOptionsParser
return Options{..}

traversalOptionsParser :: Parser TraversalOptions
traversalOptionsParser = do
exclusionOptionsParser :: Parser ExclusionOptions
exclusionOptionsParser = do
toIgnored <- many . globOption $
long "ignored" <>
metavar "GLOB PATTERN" <>
help "Files which we pretend do not exist.\
\ Glob patterns that contain wildcards MUST be enclosed\
\ in quotes to avoid being expanded by shell."
return TraversalOptions{..}
return ExclusionOptions{..}

verifyOptionsParser :: Parser VerifyOptions
verifyOptionsParser = do
voMaxRetries <- option (Just <$> auto) $
networkingOptionsParser :: Parser NetworkingOptions
networkingOptionsParser = do
noMaxRetries <- option (Just <$> auto) $
long "retries" <>
metavar "INT" <>
value Nothing <>
help "How many attempts to retry an external link after getting \
\a \"429 Too Many Requests\" response."
return VerifyOptions{..}
return NetworkingOptions{..}

dumpConfigOptions :: Parser Command
dumpConfigOptions = hsubparser $
Expand Down
7 changes: 4 additions & 3 deletions src/Xrefcheck/Command.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import Data.Yaml (decodeFileEither, prettyPrintParseException)
import Fmt (blockListF', build, fmt, fmtLn, indentF)
import System.Directory (doesFileExist)

import Xrefcheck.CLI (Options (..), addTraversalOptions, addVerifyOptions, defaultConfigPaths)
import Xrefcheck.CLI (Options (..), addExclusionOptions, addNetworkingOptions, defaultConfigPaths)
import Xrefcheck.Config
(Config, Config' (..), ScannersConfig, ScannersConfig' (..), defConfig, normaliseConfigFilePaths,
overrideConfig)
Expand Down Expand Up @@ -60,7 +60,7 @@ defaultAction Options{..} = do
let showProgressBar = oShowProgressBar ?: not withinCI

(ScanResult scanErrs repoInfo) <- allowRewrite showProgressBar $ \rw -> do
let fullConfig = addTraversalOptions (cTraversal config) oTraversalOptions
let fullConfig = addExclusionOptions (cExclusions config) oExclusionOptions
scanRepo rw (formats $ cScanners config) fullConfig oRoot

when oVerbose $
Expand All @@ -69,7 +69,8 @@ defaultAction Options{..} = do
unless (null scanErrs) . reportScanErrs $ sortBy (compare `on` seFile) scanErrs

verifyRes <- allowRewrite showProgressBar $ \rw -> do
let fullConfig = addVerifyOptions (cVerification config) oVerifyOptions
let fullConfig = config
{ cNetworking = addNetworkingOptions (cNetworking config) oNetworkingOptions }
verifyRepo rw fullConfig oMode oRoot repoInfo

case verifyErrors verifyRes of
Expand Down
129 changes: 54 additions & 75 deletions src/Xrefcheck/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,13 @@ import Data.Yaml (FromJSON (..), decodeEither', prettyPrintParseException, withT
import Instances.TH.Lift ()
import Text.Regex.TDFA qualified as R
import Text.Regex.TDFA.ByteString ()
import Text.Regex.TDFA.Common
import Text.Regex.TDFA.Text qualified as R

import Time (KnownRatName, Second, Time (..), unitsP)

import Xrefcheck.Config.Default
import Xrefcheck.Core
import Xrefcheck.Scan
import Xrefcheck.Scanners.Markdown
import Xrefcheck.System (RelGlobPattern, normaliseGlobPattern)
import Xrefcheck.Util (Field, aesonConfigOption, postfixFields, (-:))

-- | Type alias for Config' with all required fields.
Expand All @@ -40,58 +37,50 @@ type ConfigOptional = Config' Maybe

-- | Overall config.
data Config' f = Config
{ cTraversal :: Field f (TraversalConfig' f)
, cVerification :: Field f (VerifyConfig' f)
, cScanners :: Field f (ScannersConfig' f)
{ cExclusions :: Field f (ExclusionConfig' f)
, cNetworking :: Field f (NetworkingConfig' f)
, cScanners :: Field f (ScannersConfig' f)
} deriving stock (Generic)

normaliseConfigFilePaths :: Config -> Config
normaliseConfigFilePaths Config{..}
= Config
{ cTraversal = normaliseTraversalConfigFilePaths cTraversal
, cVerification = normaliseVerifyConfigFilePaths cVerification
, cScanners
{ cExclusions = normaliseExclusionConfigFilePaths cExclusions
, ..
}

-- | Type alias for VerifyConfig' with all required fields.
type VerifyConfig = VerifyConfig' Identity

-- | Config of verification.
data VerifyConfig' f = VerifyConfig
{ vcAnchorSimilarityThreshold :: Field f Double
, vcExternalRefCheckTimeout :: Field f (Time Second)
, vcVirtualFiles :: Field f [RelGlobPattern]
-- ^ Files which we pretend do exist.
, vcNotScanned :: Field f [RelGlobPattern]
-- ^ Files, references in which we should not analyze.
, vcIgnoreRefs :: Field f [Regex]
-- ^ Regular expressions that match external references we should not verify.
, vcIgnoreAuthFailures :: Field f Bool
-- | Type alias for NetworkingConfig' with all required fields.
type NetworkingConfig = NetworkingConfig' Identity

-- | Config of networking.
data NetworkingConfig' f = NetworkingConfig
{ ncExternalRefCheckTimeout :: Field f (Time Second)
-- ^ When checking external references, how long to wait on request before
-- declaring "Response timeout".
, ncIgnoreAuthFailures :: Field f Bool
-- ^ If True - links which return 403 or 401 code will be skipped,
-- otherwise – will be marked as broken, because we can't check it.
, vcDefaultRetryAfter :: Field f (Time Second)
, ncDefaultRetryAfter :: Field f (Time Second)
-- ^ Default Retry-After delay, applicable when we receive a 429 response
-- and it does not contain a @Retry-After@ header.
, vcMaxRetries :: Field f Int
, ncMaxRetries :: Field f Int
-- ^ How many attempts to retry an external link after getting
-- a "429 Too Many Requests" response.
} deriving stock (Generic)

normaliseVerifyConfigFilePaths :: VerifyConfig -> VerifyConfig
normaliseVerifyConfigFilePaths vc@VerifyConfig{ vcVirtualFiles, vcNotScanned}
= vc
{ vcVirtualFiles = map normaliseGlobPattern vcVirtualFiles
, vcNotScanned = map normaliseGlobPattern vcNotScanned
}

-- | Type alias for ScannersConfig' with all required fields.
type ScannersConfig = ScannersConfig' Identity

-- | Configs for all the supported scanners.
data ScannersConfig' f = ScannersConfig
{ scMarkdown :: Field f (MarkdownConfig' f)
, scAnchorSimilarityThreshold :: Field f Double
-- ^ On 'anchor not found' error, how much similar anchors should be displayed as
-- hint. Number should be between 0 and 1, larger value means stricter filter.
} deriving stock (Generic)

makeLensesWith postfixFields ''Config'
makeLensesWith postfixFields ''VerifyConfig'
makeLensesWith postfixFields ''NetworkingConfig'

-- | Picks raw config with @:PLACEHOLDER:<key>:@ and fills the specified fields
-- in it, picking a replacement suitable for the given key. Only strings and lists
Expand Down Expand Up @@ -209,34 +198,44 @@ defConfig flavor = normaliseConfigFilePaths $
overrideConfig :: ConfigOptional -> Config
overrideConfig config
= Config
{ cTraversal = TraversalConfig ignored
, cVerification = maybe defVerification overrideVerify $ cVerification config
, cScanners = ScannersConfig (MarkdownConfig flavor)
{ cExclusions = maybe defExclusions overrideExclusions $ cExclusions config
, cNetworking = maybe defNetworking overrideNetworking $ cNetworking config
, cScanners = ScannersConfig
{ scMarkdown = MarkdownConfig flavor
, scAnchorSimilarityThreshold =
fromMaybe (scAnchorSimilarityThreshold defScanners)
$ scAnchorSimilarityThreshold =<< cScanners config
}
}
where
flavor = fromMaybe GitHub
$ mcFlavor =<< scMarkdown =<< cScanners config

defTraversal = cTraversal $ defConfig flavor

ignored = fromMaybe (tcIgnored defTraversal) $ tcIgnored =<< cTraversal config
defScanners = cScanners $ defConfig flavor
defExclusions = cExclusions $ defConfig flavor
defNetworking = cNetworking $ defConfig flavor

defVerification = cVerification $ defConfig flavor

overrideVerify verifyConfig
= VerifyConfig
{ vcAnchorSimilarityThreshold = overrideField vcAnchorSimilarityThreshold
, vcExternalRefCheckTimeout = overrideField vcExternalRefCheckTimeout
, vcVirtualFiles = overrideField vcVirtualFiles
, vcNotScanned = overrideField vcNotScanned
, vcIgnoreRefs = overrideField vcIgnoreRefs
, vcIgnoreAuthFailures = overrideField vcIgnoreAuthFailures
, vcDefaultRetryAfter = overrideField vcDefaultRetryAfter
, vcMaxRetries = overrideField vcMaxRetries
overrideExclusions exclusionConfig
= ExclusionConfig
{ ecIgnored = overrideField ecIgnored
, ecVirtualFiles = overrideField ecVirtualFiles
, ecNotScanned = overrideField ecNotScanned
, ecIgnoreRefs = overrideField ecIgnoreRefs
}
where
overrideField :: (forall f. ExclusionConfig' f -> Field f a) -> a
overrideField field = fromMaybe (field defExclusions) $ field exclusionConfig

overrideNetworking networkingConfig
= NetworkingConfig
{ ncExternalRefCheckTimeout = overrideField ncExternalRefCheckTimeout
, ncIgnoreAuthFailures = overrideField ncIgnoreAuthFailures
, ncDefaultRetryAfter = overrideField ncDefaultRetryAfter
, ncMaxRetries = overrideField ncMaxRetries
}
where
overrideField :: (forall f. VerifyConfig' f -> Field f a) -> a
overrideField field = fromMaybe (field defVerification) $ field verifyConfig
overrideField :: (forall f. NetworkingConfig' f -> Field f a) -> a
overrideField field = fromMaybe (field defNetworking) $ field networkingConfig

-----------------------------------------------------------
-- Yaml instances
Expand All @@ -246,36 +245,16 @@ instance KnownRatName unit => FromJSON (Time unit) where
parseJSON = withText "time" $
maybe (fail "Unknown time") pure . unitsP . toString

instance FromJSON Regex where
parseJSON = withText "regex" $ \val -> do
let errOrRegex = R.compile defaultCompOption defaultExecOption val
either (error . show) return errOrRegex

-- Default boolean values according to
-- https://hackage.haskell.org/package/regex-tdfa-1.3.1.0/docs/Text-Regex-TDFA.html#t:CompOption
defaultCompOption :: CompOption
defaultCompOption = CompOption
{ caseSensitive = True
, multiline = True
, rightAssoc = True
, newSyntax = True
, lastStarGreedy = False
}

-- ExecOption value to improve speed
defaultExecOption :: ExecOption
defaultExecOption = ExecOption {captureGroups = False}

instance FromJSON (ConfigOptional) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (Config) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (VerifyConfig' Maybe) where
instance FromJSON (NetworkingConfig' Maybe) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (VerifyConfig) where
instance FromJSON (NetworkingConfig) where
parseJSON = genericParseJSON aesonConfigOption

instance FromJSON (ScannersConfig' Maybe) where
Expand Down
Loading

0 comments on commit 108f9d3

Please sign in to comment.