From 8c12e81ca9d5fa57671531893eb26e8a0a54fe8d Mon Sep 17 00:00:00 2001 From: Yuri Romanowski Date: Wed, 14 Dec 2022 17:33:52 +0500 Subject: [PATCH 1/3] [#64] Implement copy/paste protection checks Problem: Currently xrefcheck is able to detect possibly bad copy-pastes, but there is no way to disable those checks locally for a file/paragraph/link. Solution: Add support for related annotations for `.md` files. --- CHANGES.md | 4 +- README.md | 16 ++ src/Xrefcheck/Core.hs | 25 +- src/Xrefcheck/Scan.hs | 13 +- src/Xrefcheck/Scanners/Markdown.hs | 252 +++++++++++++----- src/Xrefcheck/Verify.hs | 5 +- tests/Test/Xrefcheck/CopyPasteCheckSpec.hs | 37 ++- tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs | 83 ++++-- tests/Test/Xrefcheck/TooManyRequestsSpec.hs | 6 +- tests/Test/Xrefcheck/UtilRequests.hs | 2 +- tests/golden/check-copy-paste/expected.gold | 63 +++++ tests/golden/check-copy-paste/first-file.md | 7 + tests/golden/check-copy-paste/second-file.md | 46 ++++ tests/golden/check-scan-errors/expected.gold | 4 +- .../with-annotations/ignore_file_cpc.md | 19 ++ .../with-annotations/ignore_link_cpc.md | 49 ++++ .../with-annotations/ignore_paragraph_cpc.md | 16 ++ .../markdowns/with-annotations/no_link_cpc.md | 8 + .../with-annotations/no_paragraph_cpc.md | 9 + .../unexpected_ignore_file_cpc.md | 11 + 20 files changed, 557 insertions(+), 118 deletions(-) create mode 100644 tests/markdowns/with-annotations/ignore_file_cpc.md create mode 100644 tests/markdowns/with-annotations/ignore_link_cpc.md create mode 100644 tests/markdowns/with-annotations/ignore_paragraph_cpc.md create mode 100644 tests/markdowns/with-annotations/no_link_cpc.md create mode 100644 tests/markdowns/with-annotations/no_paragraph_cpc.md create mode 100644 tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md diff --git a/CHANGES.md b/CHANGES.md index a4d037eb..5988548b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -36,11 +36,13 @@ Unreleased + Now we call references to anchors in current file (e.g. `[a](#b)`) as `file-local` references instead of calling them `current file` (which was ambiguous). * [#233](https://github.com/serokell/xrefcheck/pull/233) - + Now xrefxcheck does not follow redirect links by default. It fails for permanent + + Now xrefcheck does not follow redirect links by default. It fails for permanent redirect responses (i.e. 301 and 308) and passes for temporary ones (i.e. 302, 303, 307). * [#231](https://github.com/serokell/xrefcheck/pull/231) + Anchor analysis takes now into account the appropriate case-sensitivity depending on the configured Markdown flavour. +* [240](https://github.com/serokell/xrefcheck/pull/240) + + Now xrefcheck is able to detect possible copy-pastes relying on links and their names. 0.2.2 ========== diff --git a/README.md b/README.md index 73d76c56..9bfd5c35 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Comparing to alternative solutions, this tool tries to achieve the following poi * Supports external links (`http`, `https`, `ftp` and `ftps`). * Detects broken and ambiguous anchors in local links. * Integration with GitHub Actions. +* Detects possible bad copy-pastes of links. ## Dependencies [↑](#xrefcheck) @@ -148,6 +149,21 @@ There are several ways to fix this: * By default, `xrefcheck` will ignore links to localhost. * This behavior can be disabled by removing the corresponding entry from the `ignoreExternalRefsTo` list in the config file. +1. How do I disable copy-paste check for specific links? + * Add a `` annotation before the link: + ```md + + Links with bad copypaste: + [good link](https://good.link.uri/). + [copypasted link](https://good.link.uri/). + ``` + ```md + A [good link](https://good.link.uri/) + followed by an [copypasted intentionally](https://good.link.uri/). + ``` + * You can use a `` annotation to disable copy-paste check in a paragraph. + * You can use a `` annotation to disable copy-paste check within an entire file. + ## Further work [↑](#xrefcheck) - [ ] Support link detection in different languages, not only Markdown. diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 3863285f..3cb5b383 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -12,6 +12,7 @@ module Xrefcheck.Core where import Universum import Control.Lens (makeLenses) +import Control.Lens.Combinators (makeLensesWith) import Data.Aeson (FromJSON (..), withText) import Data.Char (isAlphaNum) import Data.Char qualified as C @@ -70,14 +71,17 @@ instance Given ColorMode => Buildable Position where -- | Full info about a reference. data Reference = Reference - { rName :: Text + { rName :: Text -- ^ Text displayed as reference. - , rLink :: Text + , rLink :: Text -- ^ File or site reference points to. - , rAnchor :: Maybe Text + , rAnchor :: Maybe Text -- ^ Section or custom anchor tag. - , rPos :: Position + , rPos :: Position + , rCheckCopyPaste :: Bool + -- ^ Whether to check bad copy/paste for this link } deriving stock (Show, Generic, Eq, Ord) +makeLensesWith postfixFields ''Reference -- | Context of anchor. data AnchorType @@ -102,9 +106,9 @@ data FileInfoDiff = FileInfoDiff } makeLenses ''FileInfoDiff -diffToFileInfo :: FileInfoDiff -> FileInfo -diffToFileInfo (FileInfoDiff refs anchors) = - FileInfo (DList.toList refs) (DList.toList anchors) +diffToFileInfo :: Bool -> FileInfoDiff -> FileInfo +diffToFileInfo ignoreCpcInFile (FileInfoDiff refs anchors) = + FileInfo (DList.toList refs) (DList.toList anchors) ignoreCpcInFile instance Semigroup FileInfoDiff where FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d) @@ -114,13 +118,14 @@ instance Monoid FileInfoDiff where -- | All information regarding a single file we care about. data FileInfo = FileInfo - { _fiReferences :: [Reference] - , _fiAnchors :: [Anchor] + { _fiReferences :: [Reference] + , _fiAnchors :: [Anchor] + , _fiCopyPasteCheck :: Bool } deriving stock (Show, Generic) makeLenses ''FileInfo instance Default FileInfo where - def = diffToFileInfo mempty + def = diffToFileInfo True mempty data ScanPolicy = OnlyTracked diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index da312a0e..91270a14 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -117,18 +117,27 @@ data ScanErrorDescription = LinkErr | FileErr | ParagraphErr Text + | LinkErrCpc + | FileErrCpc + | ParagraphErrCpc Text | UnrecognisedErr Text deriving stock (Show, Eq) instance Buildable ScanErrorDescription where build = \case LinkErr -> [int||Expected a LINK after "ignore link" annotation|] + LinkErrCpc -> [int||Expected a LINK after "no duplication check in link" annotation|] FileErr -> [int||Annotation "ignore all" must be at the top of \ markdown or right after comments at the top|] + FileErrCpc -> [int||Annotation "no duplication check in file" must be at the top of \ + markdown or right after comments at the top|] ParagraphErr txt -> [int||Expected a PARAGRAPH after \ "ignore paragraph" annotation, but found #{txt}|] - UnrecognisedErr txt -> [int||Unrecognised option "#{txt}" perhaps you meant \ - <"ignore link"|"ignore paragraph"|"ignore all">|] + ParagraphErrCpc txt -> [int||Expected a PARAGRAPH after \ + "no duplication check in paragraph" annotation, but found #{txt}|] + UnrecognisedErr txt -> [int||Unrecognised option "#{txt}", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "?|] specificFormatsSupport :: [([Extension], ScanAction)] -> FormatsSupport specificFormatsSupport formats = \ext -> M.lookup ext formatsMap diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index ce684b39..a630f1c3 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -54,6 +54,24 @@ instance Buildable C.Node where build (C.Node _mpos ty mSubs) = nameF (show ty) $ maybe "[]" interpolateBlockListF (nonEmpty mSubs) +data Node a = Node + { _ndPos :: Maybe PosInfo + , _ndType :: NodeType + , _ndInfo :: a + , _ndSubs :: [Node a] + } + +instance Buildable (Node a) where + build (Node _mpos ty _info mSubs) = nameF (show ty) $ + maybe "[]" interpolateBlockListF (nonEmpty mSubs) + +-- Here and below CPC stands for "copy/paste check" +type NodeCPC = Node CopyPasteCheck + +newtype CopyPasteCheck = CopyPasteCheck + { cpcShouldCheck :: Bool + } deriving stock (Show, Eq, Generic) + toPosition :: Maybe PosInfo -> Position toPosition = Position . \case Nothing -> Nothing @@ -68,7 +86,7 @@ toPosition = Position . \case |] -- | Extract text from the topmost node. -nodeExtractText :: (C.Node) -> Text +nodeExtractText :: Node info -> Text nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten where extractText = \case @@ -76,8 +94,8 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten CODE t -> t _ -> "" - nodeFlatten :: (C.Node) -> [NodeType] - nodeFlatten (C.Node _pos ty subs) = ty : concatMap nodeFlatten subs + nodeFlatten :: Node info -> [NodeType] + nodeFlatten (Node _pos ty _info subs) = ty : concatMap nodeFlatten subs data IgnoreMode @@ -120,6 +138,7 @@ makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore data Annotation = IgnoreAnnotation IgnoreMode + | IgnoreCopyPasteCheck IgnoreMode | InvalidAnnotation Text deriving stock (Eq) @@ -127,6 +146,8 @@ data Annotation data ScannerState = ScannerState { _ssIgnore :: Maybe Ignore + , _ssIgnoreCopyPasteCheck :: Maybe Ignore + , _ssParagraphExpectedAfterCpcAnnotation :: Bool , _ssParentNodeType :: Maybe NodeType -- ^ @cataNodeWithParentNodeInfo@ allows to get a @NodeType@ of parent node from this field } @@ -135,7 +156,9 @@ makeLenses ''ScannerState initialScannerState :: ScannerState initialScannerState = ScannerState { _ssIgnore = Nothing + , _ssIgnoreCopyPasteCheck = Nothing , _ssParentNodeType = Nothing + , _ssParagraphExpectedAfterCpcAnnotation = False } type ScannerM a = StateT ScannerState (Writer [ScanError]) a @@ -155,40 +178,49 @@ cataNodeWithParentNodeInfo f node = cataNode f' node map (ssParentNodeType .= Just ty >>) childScanners -- | Find ignore annotations (ignore paragraph and ignore link) --- and remove nodes that should be ignored. -processAnnotations :: FilePath -> C.Node -> Writer [ScanError] C.Node +-- and remove nodes that should be ignored; +-- find copy/paste check annotations (ignore for paragraph and for link) +-- and label nodes with a boolean meaning whether they should be +-- copy/paste checked. +processAnnotations :: FilePath -> C.Node -> Writer [ScanError] NodeCPC processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process where process :: Maybe PosInfo -> NodeType - -> [ScannerM C.Node] - -> ScannerM C.Node + -> [ScannerM NodeCPC] + -> ScannerM NodeCPC process pos ty subs = do let node = C.Node pos ty [] - use ssIgnore >>= \ign -> do + use ssIgnore >>= \ign -> + use ssIgnoreCopyPasteCheck >>= \ignCPC -> do -- When no `Ignore` state is set check next node for annotation, -- if found then set it as new `IgnoreMode` otherwise skip node. - let mbAnnotation = getAnnotation node - case mbAnnotation of + let shouldCheckCPC = CopyPasteCheck $ isNothing ignCPC + let traverseChildren = Node pos ty shouldCheckCPC <$> sequence subs + case getAnnotation node of Just ann -> handleAnnotation pos ty ann Nothing -> do case ty of - PARAGRAPH -> handleParagraph ign pos ty subs - LINK {} -> handleLink ign pos ty subs - IMAGE {} -> handleLink ign pos ty subs - _ -> handleOther ign pos ty subs + PARAGRAPH -> handleParagraph ign traverseChildren + LINK {} -> handleLink ign ty traverseChildren + IMAGE {} -> handleLink ign ty traverseChildren + _ -> handleOther ign ty traverseChildren handleLink :: Maybe Ignore -> - Maybe PosInfo -> NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleLink ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs - -- It can be checked that it's correct for all the cases + ScannerM NodeCPC -> + ScannerM NodeCPC + handleLink ign ty traverseChildren = do + -- It's common for all ignore states ssIgnore .= Nothing + -- If there was a copy/paste ignore annotation that expected link, + -- reset this state + resetCpcIgnoreIfLink + -- If right now there was a copy/paste ignore annotation for paragraph, + -- emit an error and reset these states. + reportExpectedParagraphAfterIgnoreCpcAnnotation ty case ign of Nothing -> traverseChildren @@ -200,73 +232,122 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process handleParagraph :: Maybe Ignore -> - Maybe PosInfo -> - NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleParagraph ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs + ScannerM NodeCPC -> + ScannerM NodeCPC + handleParagraph ign traverseChildren = do + -- If a new paragraph was expected (this stands for True), now we + -- don't expect paragraphs any more. + ssParagraphExpectedAfterCpcAnnotation .= False node <- case ign of - Nothing -> traverseChildren + Nothing -> + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren Just (Ignore IMSParagraph _) -> do ssIgnore .= Nothing pure defNode Just (Ignore (IMSLink ignoreLinkState) modePos) -> - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos $ + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren + + ssIgnoreCopyPasteCheck .= Nothing use ssIgnore >>= \case - Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> + Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> do lift $ tell $ makeError pragmaPos fp LinkErr + ssIgnore .= Nothing + _ -> pass + use ssIgnoreCopyPasteCheck >>= \case + Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> do + lift $ tell $ makeError pragmaPos fp LinkErrCpc + ssIgnoreCopyPasteCheck .= Nothing _ -> pass + pure node handleOther :: Maybe Ignore -> - Maybe PosInfo -> NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleOther ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs + ScannerM NodeCPC -> + ScannerM NodeCPC + handleOther ign ty traverseChildren = do + -- If right now there was a copy/paste ignore annotation for paragraph, + -- emit an error and reset these states. + reportExpectedParagraphAfterIgnoreCpcAnnotation ty case ign of - Nothing -> traverseChildren + Nothing -> + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren Just (Ignore IMSParagraph modePos) -> do reportExpectedParagraphAfterIgnoreAnnotation modePos ty ssIgnore .= Nothing - traverseChildren - Just (Ignore (IMSLink ignoreLinkState) modePos) -> do - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren + Just (Ignore (IMSLink ignoreLinkState) modePos) -> + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos $ + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren reportExpectedParagraphAfterIgnoreAnnotation :: Maybe PosInfo -> NodeType -> ScannerM () reportExpectedParagraphAfterIgnoreAnnotation modePos ty = lift . tell . makeError modePos fp . ParagraphErr $ prettyType ty - traverseNodeWithLinkExpected :: + resetCpcIgnoreIfLink :: ScannerM () + resetCpcIgnoreIfLink = do + curCpcIgnore <- use ssIgnoreCopyPasteCheck + case _ignoreMode <$> curCpcIgnore of + Just (IMSLink _) -> ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + + reportExpectedParagraphAfterIgnoreCpcAnnotation :: + NodeType -> ScannerM () + reportExpectedParagraphAfterIgnoreCpcAnnotation ty = + use ssIgnoreCopyPasteCheck >>= \case + Just (Ignore IMSParagraph modePos) -> + whenM (use ssParagraphExpectedAfterCpcAnnotation) $ do + lift . tell . makeError modePos fp . ParagraphErrCpc $ prettyType ty + ssParagraphExpectedAfterCpcAnnotation .= False + ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + + wrapTraverseNodeWithLinkExpected :: IgnoreLinkState -> Maybe PosInfo -> - Maybe PosInfo -> - NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs = do - when (ignoreLinkState == ExpectingLinkInSubnodes) $ - ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink - node' <- C.Node pos ty <$> sequence subs - when (ignoreLinkState == ExpectingLinkInSubnodes) $ do + ScannerM NodeCPC -> + ScannerM NodeCPC + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos = + if ignoreLinkState /= ExpectingLinkInSubnodes + then id + else \traverse' -> do + ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink + node' <- traverse' currentIgnore <- use ssIgnore case currentIgnore of Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do lift $ tell $ makeError modePos fp LinkErr ssIgnore .= Nothing _ -> pass - return node' + return node' + + wrapTraverseNodeWithLinkExpectedForCpc :: + ScannerM NodeCPC -> + ScannerM NodeCPC + wrapTraverseNodeWithLinkExpectedForCpc traverse' = do + ignoreCpc <- use ssIgnoreCopyPasteCheck + case ignoreCpc of + Just (Ignore (IMSLink ExpectingLinkInSubnodes) modePos) -> do + ssIgnoreCopyPasteCheck . _Just . ignoreMode .= IMSLink ParentExpectsLink + node' <- traverse' + currentIgnore <- use ssIgnoreCopyPasteCheck + case currentIgnore of + Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do + lift $ tell $ makeError modePos fp LinkErrCpc + ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + return node' + _ -> traverse' handleAnnotation :: Maybe PosInfo -> NodeType -> Annotation - -> ScannerM C.Node + -> ScannerM NodeCPC handleAnnotation pos nodeType = \case IgnoreAnnotation mode -> do let reportIfThereWasAnnotation :: ScannerM () @@ -300,6 +381,41 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process whenJust mbIgnoreModeState $ \ignoreModeState -> (ssIgnore .= Just (Ignore ignoreModeState correctPos)) pure defNode + IgnoreCopyPasteCheck mode -> do + mbIgnoreModeState <- case mode of + IMLink -> use ssParentNodeType <&> Just . IMSLink . \case + Just PARAGRAPH -> ExpectingLinkInParagraph + _ -> ExpectingLinkInSubnodes + + IMParagraph -> do + ssParagraphExpectedAfterCpcAnnotation .= True + pure $ Just IMSParagraph + + -- We don't expect to find an `ignore all` annotation here, + -- since that annotation should be at the top of the file and + -- any correct annotations should be handled in `checkGlobalAnnotations` + -- function. + IMAll -> do + lift . tell $ makeError correctPos fp FileErrCpc + pure Nothing + + whenJust mbIgnoreModeState $ \ignoreModeState -> do + let setupNewCpcState = ssIgnoreCopyPasteCheck .= Just (Ignore ignoreModeState correctPos) + use ssIgnoreCopyPasteCheck >>= \case + Nothing -> setupNewCpcState + Just (Ignore curIgn prevPos) + | IMSLink _ <- curIgn -> do + lift $ tell $ makeError prevPos fp LinkErrCpc + setupNewCpcState + | IMSParagraph <- curIgn -> case ignoreModeState of + IMSParagraph -> do + lift . tell . makeError prevPos fp . ParagraphErrCpc $ prettyType nodeType + setupNewCpcState + -- It's OK to have link annotation when paragraph is ignored + -- because in this case all links and all annotations are ignored. + _ -> pass + pure defNode + InvalidAnnotation msg -> do lift . tell $ makeError correctPos fp $ UnrecognisedErr msg pure defNode @@ -312,8 +428,8 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process in fromMaybe "" mType withIgnoreMode - :: ScannerM C.Node - -> Writer [ScanError] C.Node + :: ScannerM (Node info) + -> Writer [ScanError] (Node info) withIgnoreMode action = action `runStateT` initialScannerState >>= \case -- We expect `Ignore` state to be `Nothing` when we reach EOF, -- otherwise that means there was an annotation that didn't match @@ -328,8 +444,8 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process (node, _) -> pure node -- | Custom `foldMap` for source tree. -foldNode :: (Monoid a, Monad m) => (C.Node -> m a) -> C.Node -> m a -foldNode action node@(C.Node _ _ subs) = do +foldNode :: (Monoid a, Monad m) => (Node info -> m a) -> Node info -> m a +foldNode action node@(Node _ _ _ subs) = do a <- action node b <- concatForM subs (foldNode action) return (a <> b) @@ -342,16 +458,16 @@ nodeExtractInfo -> C.Node -> ExtractorM FileInfo nodeExtractInfo fp (C.Node nPos nTy nSubs) = do - let (ignoreFile, contentNodes) = checkGlobalAnnotations nSubs + let (ignoreFile, ignoreCpcInFile, contentNodes) = checkGlobalAnnotations nSubs if ignoreFile then return def - else diffToFileInfo <$> + else diffToFileInfo (not ignoreCpcInFile) <$> (lift (processAnnotations fp $ C.Node nPos nTy contentNodes) >>= foldNode extractor) where - extractor :: C.Node -> ExtractorM FileInfoDiff - extractor node@(C.Node pos ty _) = + extractor :: NodeCPC -> ExtractorM FileInfoDiff + extractor node@(Node pos ty info _) = case ty of HTML_BLOCK _ -> do return mempty @@ -401,15 +517,17 @@ nodeExtractInfo fp (C.Node nPos nTy nSubs) = do t : ts -> (t, Just $ T.intercalate "#" ts) [] -> error "impossible" return $ FileInfoDiff - (DList.singleton $ Reference {rName, rPos, rLink, rAnchor}) + (DList.singleton $ + Reference {rName, rPos, rLink, rAnchor, rCheckCopyPaste = cpcShouldCheck info}) DList.empty -- | Check for global annotations, ignoring simple comments if there are any. -checkGlobalAnnotations :: [C.Node] -> (Bool, [C.Node]) +checkGlobalAnnotations :: [C.Node] -> (Bool, Bool, [C.Node]) checkGlobalAnnotations nodes = do let (headerNodes, contentsNodes) = span isHeaderNode nodes ignoreFile = any isIgnoreFile headerNodes - (ignoreFile, contentsNodes) + ignoreCpcInFile = any isIgnoreCpcWithinFile headerNodes + (ignoreFile, ignoreCpcInFile, contentsNodes) where isSimpleComment :: C.Node -> Bool isSimpleComment node = do @@ -420,15 +538,20 @@ checkGlobalAnnotations nodes = do isIgnoreFile :: C.Node -> Bool isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation + isIgnoreCpcWithinFile :: C.Node -> Bool + isIgnoreCpcWithinFile = (Just (IgnoreCopyPasteCheck IMAll) ==) . getAnnotation + isHeaderNode :: C.Node -> Bool isHeaderNode node = any ($ node) [ isSimpleComment , isIgnoreFile + , isIgnoreCpcWithinFile ] -defNode :: C.Node -defNode = C.Node Nothing DOCUMENT [] -- hard-coded default Node +-- | Hard-coded default Node +defNode :: NodeCPC +defNode = Node Nothing DOCUMENT (CopyPasteCheck False) [] makeError :: Maybe PosInfo @@ -473,6 +596,8 @@ textToMode :: Text -> Annotation textToMode annText = case wordsList of ("ignore" : [x]) | Just ignMode <- getIgnoreMode x -> IgnoreAnnotation ignMode + ("no" : "duplication" : "check" : "in" : [x]) + | Just ignMode <- getIgnoreMode x -> IgnoreCopyPasteCheck ignMode _ -> InvalidAnnotation annText where wordsList = words annText @@ -482,6 +607,7 @@ getIgnoreMode = \case "link" -> Just IMLink "paragraph" -> Just IMParagraph "all" -> Just IMAll + "file" -> Just IMAll _ -> Nothing parseFileInfo :: MarkdownConfig -> FilePath -> LT.Text -> (FileInfo, [ScanError]) diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index b7daca93..4faddb7e 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -409,7 +409,7 @@ verifyRepo -- user that we are scanning only files -- added to Git while gathering RepoInfo. - toCheckCopyPaste = map (second _fiReferences) filesToScan + toCheckCopyPaste = map (second _fiReferences) $ filter (_fiCopyPasteCheck . snd) filesToScan toScan = concatMap (\(file, fileInfo) -> map (file,) $ _fiReferences fileInfo) filesToScan copyPasteErrors = if scCopyPasteCheckEnabled cScanners then [ res @@ -460,7 +460,8 @@ checkCopyPaste file refs = do let getLinkAndAnchor x = (rLink x, rAnchor x) groupedRefs = L.groupBy ((==) `on` getLinkAndAnchor) $ - sortBy (compare `on` getLinkAndAnchor) refs + sortBy (compare `on` getLinkAndAnchor) $ + filter rCheckCopyPaste refs concatMap checkGroup groupedRefs where checkGroup :: [Reference] -> [CopyPasteCheckResult] diff --git a/tests/Test/Xrefcheck/CopyPasteCheckSpec.hs b/tests/Test/Xrefcheck/CopyPasteCheckSpec.hs index fbe09ba6..6db45402 100644 --- a/tests/Test/Xrefcheck/CopyPasteCheckSpec.hs +++ b/tests/Test/Xrefcheck/CopyPasteCheckSpec.hs @@ -26,12 +26,14 @@ test_copyPasteCheck = testGroup "Copypaste check" anchor = Just "heading" differentAnchor = Nothing defPos = Position Nothing - original1 = Reference "_- First - - File" link anchor defPos - original2 = Reference "_- First - fi - le" link anchor defPos - notCopied = Reference " Link 2 " link differentAnchor defPos - copied1 = Reference " foo bar" link anchor defPos - copied2 = Reference " Baz quux" link anchor defPos - input = [original1, original2, notCopied, copied1, copied2] + original1 = Reference "_- First - - File" link anchor defPos True + original2 = Reference "_- First - fi - le" link anchor defPos True + notCopied = Reference " Link 2 " link differentAnchor defPos True + copied1 = Reference " foo bar" link anchor defPos True + copied2 = Reference " Baz quux" link anchor defPos True + -- Do not report link with False + copied3 = Reference " Qib yse" link anchor defPos False + input = [original1, original2, notCopied, copied1, copied2, copied3] res = checkCopyPaste testPath input expectedRes = -- only first matching link is shown in the output @@ -43,19 +45,30 @@ test_copyPasteCheck = testGroup "Copypaste check" let link = "./first-file" anchor = Just "heading" defPos = Position Nothing - original1 = Reference "_Foo bar" link anchor defPos - original2 = Reference " Baz quux" link anchor defPos - original3 = Reference " Foo qubarx" link anchor defPos + original1 = Reference "_Foo bar" link anchor defPos True + original2 = Reference " Baz quux" link anchor defPos True + original3 = Reference " Foo qubarx" link anchor defPos True input = [original1, original2, original3] res = checkCopyPaste testPath input expectedRes = [] res @?= expectedRes - , testCase "Check external links" $ do + , testCase "Succeed if there is a link with a matching name but with check disabled" $ do + let link = "./first-file" + anchor = Just "heading" + defPos = Position Nothing + original1 = Reference "_-_first-fIlE-_-" link anchor defPos False + original2 = Reference " Baz quux" link anchor defPos True + original3 = Reference " Foo qubarx" link anchor defPos True + input = [original1, original2, original3] + res = checkCopyPaste testPath input + expectedRes = [] + res @?= expectedRes + , testCase "Report external links" $ do let link = "https://github.com" anchor = Nothing defPos = Position Nothing - original = Reference "github" link anchor defPos - copied = Reference "gitlab" link anchor defPos + original = Reference "github" link anchor defPos True + copied = Reference "gitlab" link anchor defPos True input = [original, copied] res = checkCopyPaste testPath input expectedRes = diff --git a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs index c905bb5c..a2cb14a6 100644 --- a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs @@ -18,7 +18,8 @@ import Xrefcheck.Scanners.Markdown test_ignoreAnnotations :: [TestTree] test_ignoreAnnotations = - [ testGroup "Parsing failures" + [ testGroup "Parsing failures" $ + [ testGroup "Ignore annotations" [ testCase "Check if broken link annotation produce error" do let file = "tests/markdowns/with-annotations/no_link.md" errs <- getErrs file @@ -31,35 +32,71 @@ test_ignoreAnnotations = let file = "tests/markdowns/with-annotations/unexpected_ignore_file.md" errs <- getErrs file errs @?= makeError (Just $ PosInfo 9 1 9 29) file FileErr - , testCase "Check if broken unrecognised annotation produce error" do + ] + , testGroup "Ignore copypaste check annotations" + [ testCase "Check if broken copypaste link annotation produce error" do + let file = "tests/markdowns/with-annotations/no_link_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 7 1 7 48) file LinkErrCpc + , testCase "Check if broken copypaste paragraph annotation produce error" do + let file = "tests/markdowns/with-annotations/no_paragraph_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 7 1 7 53) file (ParagraphErrCpc "HEADING") + , testCase "Check if broken copypaste ignore file annotation produce error" do + let file = "tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 9 1 9 47) file FileErrCpc + ] + , testCase "Check if broken unrecognised annotation produce error" do let file = "tests/markdowns/with-annotations/unrecognised_option.md" errs <- getErrs file errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "ignore unrecognised-option") - ] - , testGroup "\"ignore link\" mode" - [ testCase "Check \"ignore link\" performance" $ do - let file = "tests/markdowns/with-annotations/ignore_link.md" - (fi, errs) <- parse GitHub file - getRefs fi @?= - ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] - errs @?= makeError (Just $ PosInfo 42 1 42 31) file LinkErr - ] - , testGroup "\"ignore paragraph\" mode" - [ testCase "Check \"ignore paragraph\" performance" $ do - (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph.md" - getRefs fi @?= ["blog", "contacts"] - errs @?= [] - ] - , testGroup "\"ignore all\" mode" - [ testCase "Check \"ignore all\" performance" $ do - (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file.md" - getRefs fi @?= [] - errs @?= [] - ] + ] + , testGroup "Check ignore pragmas" $ + [ testGroup "\"ignore link\" mode" + [ testCase "Check \"ignore link\" performance" $ do + let file = "tests/markdowns/with-annotations/ignore_link.md" + (fi, errs) <- parse GitHub file + getRefs fi @?= + ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] + errs @?= makeError (Just $ PosInfo 42 1 42 31) file LinkErr + ] + , testGroup "\"ignore paragraph\" mode" + [ testCase "Check \"ignore paragraph\" performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph.md" + getRefs fi @?= ["blog", "contacts"] + errs @?= [] + ] + , testGroup "\"ignore all\" mode" + [ testCase "Check \"ignore all\" performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file.md" + getRefs fi @?= [] + errs @?= [] + ] + ] + , testGroup "Check ignore copypaste check pragmas" $ + [ testCase "Check ignore duplication check for link pragmas" $ do + let file = "tests/markdowns/with-annotations/ignore_link_cpc.md" + (fi, errs) <- parse GitHub file + getRefsWithCpc fi @?= + ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] + errs @?= makeError (Just $ PosInfo 42 1 42 48) file LinkErrCpc + , testCase "Check ignore copypaste check for paragraph pragmas" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph_cpc.md" + getRefsWithCpc fi @?= ["blog", "contacts"] + errs @?= [] + , testCase "Check ignore copypaste check in file performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file_cpc.md" + fi ^. fiCopyPasteCheck @?= False + errs @?= [] + ] ] where getRefs :: FileInfo -> [Text] getRefs fi = map rName $ fi ^. fiReferences + getRefsWithCpc :: FileInfo -> [Text] + getRefsWithCpc fi = map rName $ filter rCheckCopyPaste $ fi ^. fiReferences + getErrs :: FilePath -> IO [ScanError] getErrs path = snd <$> parse GitHub path diff --git a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs index 8b07d490..a61ae50e 100644 --- a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs +++ b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs @@ -63,7 +63,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp @@ -88,7 +88,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = fromMaybe (sec 0) $ ttTimeToCompletion <$> pTaskTimestamp @@ -114,7 +114,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp diff --git a/tests/Test/Xrefcheck/UtilRequests.hs b/tests/Test/Xrefcheck/UtilRequests.hs index 8582cd8c..1285a600 100644 --- a/tests/Test/Xrefcheck/UtilRequests.hs +++ b/tests/Test/Xrefcheck/UtilRequests.hs @@ -62,7 +62,7 @@ checkLinkAndProgressWithServer mock link progress vrExpectation = verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int) verifyLink link = do - let reference = Reference "" link Nothing (Position Nothing) + let reference = Reference "" link Nothing (Position Nothing) False progRef <- newIORef $ initVerifyProgress [reference] result <- verifyReferenceWithProgress reference progRef p <- readIORef progRef diff --git a/tests/golden/check-copy-paste/expected.gold b/tests/golden/check-copy-paste/expected.gold index 796b4c73..48ff78bf 100644 --- a/tests/golden/check-copy-paste/expected.gold +++ b/tests/golden/check-copy-paste/expected.gold @@ -1,48 +1,111 @@ +<<<<<<< HEAD === Possible copy/paste errors === ➥ In file second-file.md reference (relative) at src:13:1-29: +======= +=== Scan errors found === + + ➥ In file second-file.md + scan error at src:35:1-25: + + Unrecognised option "no dh", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "? + + ➥ In file second-file.md + scan error at src:40:1-53: + + Expected a PARAGRAPH after "no duplication check in paragraph" annotation, but found HEADING + + ➥ In file second-file.md + scan error at src:46:1-48: + + Expected a LINK after "no duplication check in link" annotation + + ➥ In file second-file.md + scan error at src:51:1-48: + + Annotation "no duplication check in file" must be at the top of markdown or right after comments at the top + +Scan errors dumped, 4 in total. +=== Possible copy/paste errors === + + ➥ In file second-file.md + reference (relative) at src:20:1-29: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "Lol Kek" - link: ./first-file.md - anchor: - is possibly a bad copy paste of +<<<<<<< HEAD reference (relative) at src:7:1-34: +======= + reference (relative) at src:10:1-34: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "First file" - link: ./first-file.md - anchor: - ➥ In file second-file.md +<<<<<<< HEAD reference (relative) at src:14:1-30: +======= + reference (relative) at src:21:1-30: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "Baz quux" - link: ./first-file.md - anchor: - is possibly a bad copy paste of +<<<<<<< HEAD reference (relative) at src:7:1-34: +======= + reference (relative) at src:10:1-34: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "First file" - link: ./first-file.md - anchor: - ➥ In file second-file.md +<<<<<<< HEAD reference (relative) at src:24:1-29: +======= + reference (relative) at src:31:1-29: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "fdw" - link: ./first-file.md - anchor: chor is possibly a bad copy paste of +<<<<<<< HEAD reference (relative) at src:23:1-32: +======= + reference (relative) at src:30:1-32: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "ff-cho" - link: ./first-file.md - anchor: chor ➥ In file second-file.md +<<<<<<< HEAD reference (external) at src:29:1-28: +======= + reference (external) at src:70:1-28: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "gitlab" - link: https://github.com - anchor: - is possibly a bad copy paste of +<<<<<<< HEAD reference (external) at src:28:1-28: +======= + reference (external) at src:69:1-28: +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "github" - link: https://github.com - anchor: - Possible copy/paste errors dumped, 4 in total. +<<<<<<< HEAD All repository links are valid. +======= +>>>>>>> 26d87b9... [#64] Implement copy/paste protection checks diff --git a/tests/golden/check-copy-paste/first-file.md b/tests/golden/check-copy-paste/first-file.md index 92b3319f..2b4cf38c 100644 --- a/tests/golden/check-copy-paste/first-file.md +++ b/tests/golden/check-copy-paste/first-file.md @@ -4,6 +4,13 @@ - SPDX-License-Identifier: MPL-2.0 --> + + + +[ Second - ---file- ](./second-file.md) +[ Link 2](./second-file.md) + # heading # anch diff --git a/tests/golden/check-copy-paste/second-file.md b/tests/golden/check-copy-paste/second-file.md index 02f4d554..43297e4a 100644 --- a/tests/golden/check-copy-paste/second-file.md +++ b/tests/golden/check-copy-paste/second-file.md @@ -4,8 +4,15 @@ - SPDX-License-Identifier: MPL-2.0 --> + + [ First file ](./first-file.md) + + +[ Link 2](./first-file.md) + [ Link 3](./first-file.md#heading) @@ -24,6 +31,45 @@ [ fdw](./first-file.md#chor) + + + + + + + + +# asd + + + + + +# asd + + + + + + + +[ Link 3](./first-file.md) + + + + +hello, how are you, bye + + + +[github](https://github.com) +[gitlab](https://github.com) + [github](https://github.com) [gitlab](https://github.com) + + + +[github](https://github.com) +[gitlab](https://github.com) diff --git a/tests/golden/check-scan-errors/expected.gold b/tests/golden/check-scan-errors/expected.gold index a1933ffe..ca734335 100644 --- a/tests/golden/check-scan-errors/expected.gold +++ b/tests/golden/check-scan-errors/expected.gold @@ -18,7 +18,9 @@ ➥ In file check-scan-errors.md scan error at src:21:1-50: - Unrecognised option "ignore unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all"> + Unrecognised option "ignore unrecognised-annotation", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "? ➥ In file check-second-file.md scan error at src:9:1-29: diff --git a/tests/markdowns/with-annotations/ignore_file_cpc.md b/tests/markdowns/with-annotations/ignore_file_cpc.md new file mode 100644 index 00000000..691eadac --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_file_cpc.md @@ -0,0 +1,19 @@ + + + + + + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + +Serokell [blog](https://serokell.io/blog) + +Serokell [labs](https://serokell.io/labs) + +Serokell [contacts](https://serokell.io/contacts) diff --git a/tests/markdowns/with-annotations/ignore_link_cpc.md b/tests/markdowns/with-annotations/ignore_link_cpc.md new file mode 100644 index 00000000..b4e6e231 --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_link_cpc.md @@ -0,0 +1,49 @@ + + +### Do not check the first link in the paragraph + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + + + +Serokell [blog](https://serokell.io/blog) + +Serokell [labs](https://serokell.io/labs) + +Serokell +[contacts](https://serokell.io/contacts) and again +[team](https://serokell.io/team) + +### Do not check not the first link in the paragraph + +[team](https://serokell.io/team) again and [projects](https://serokell.io/projects) + +Also [hire-us](https://serokell.io/hire-us) and +[fintech](https://serokell.io/fintech-development) +development + +Here are [how-we-work](https://serokell.io/how-we-work) and [privacy](https://serokell.io/privacy) +and [ml consulting](https://serokell.io/machine-learning-consulting) + + +Do not check link bug _regression test_ [link1](link1) [link2](link2) + + +Another no duplication check in link bug _some [link1](link1) emphasis_ [link2](link2) + +### Do not check pragma should be followed by + + + +This annotation expects link in paragraph right after it. + +So [link3](link3) is not checked for copypaste. + +Annotation inside paragraph allows +softbreaks and __other *things*__ in paragraph, so [link4](link4) is checked for copypaste. diff --git a/tests/markdowns/with-annotations/ignore_paragraph_cpc.md b/tests/markdowns/with-annotations/ignore_paragraph_cpc.md new file mode 100644 index 00000000..633cc96e --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_paragraph_cpc.md @@ -0,0 +1,16 @@ + + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + +Serokell [blog](https://serokell.io/blog) + + +Serokell [labs](https://serokell.io/labs) + +Serokell [contacts](https://serokell.io/contacts) diff --git a/tests/markdowns/with-annotations/no_link_cpc.md b/tests/markdowns/with-annotations/no_link_cpc.md new file mode 100644 index 00000000..e1671e52 --- /dev/null +++ b/tests/markdowns/with-annotations/no_link_cpc.md @@ -0,0 +1,8 @@ + + + +not a link diff --git a/tests/markdowns/with-annotations/no_paragraph_cpc.md b/tests/markdowns/with-annotations/no_paragraph_cpc.md new file mode 100644 index 00000000..03967d47 --- /dev/null +++ b/tests/markdowns/with-annotations/no_paragraph_cpc.md @@ -0,0 +1,9 @@ + + + + +# not a paragraph diff --git a/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md b/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md new file mode 100644 index 00000000..70981ebe --- /dev/null +++ b/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md @@ -0,0 +1,11 @@ + + +the first paragraph + + + +the second paragraph From 337bec6b5fc20f91e27dac8b5e92de37fa53d972 Mon Sep 17 00:00:00 2001 From: Yuri Romanowski Date: Fri, 16 Dec 2022 18:36:39 +0500 Subject: [PATCH 2/3] fixup! [#64] Implement copy/paste protection checks --- tests/golden/check-copy-paste/expected.gold | 39 --------------------- 1 file changed, 39 deletions(-) diff --git a/tests/golden/check-copy-paste/expected.gold b/tests/golden/check-copy-paste/expected.gold index 48ff78bf..1077dd7d 100644 --- a/tests/golden/check-copy-paste/expected.gold +++ b/tests/golden/check-copy-paste/expected.gold @@ -1,9 +1,3 @@ -<<<<<<< HEAD -=== Possible copy/paste errors === - - ➥ In file second-file.md - reference (relative) at src:13:1-29: -======= === Scan errors found === ➥ In file second-file.md @@ -33,79 +27,46 @@ Scan errors dumped, 4 in total. ➥ In file second-file.md reference (relative) at src:20:1-29: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "Lol Kek" - link: ./first-file.md - anchor: - is possibly a bad copy paste of -<<<<<<< HEAD - reference (relative) at src:7:1-34: -======= reference (relative) at src:10:1-34: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "First file" - link: ./first-file.md - anchor: - ➥ In file second-file.md -<<<<<<< HEAD - reference (relative) at src:14:1-30: -======= reference (relative) at src:21:1-30: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "Baz quux" - link: ./first-file.md - anchor: - is possibly a bad copy paste of -<<<<<<< HEAD - reference (relative) at src:7:1-34: -======= reference (relative) at src:10:1-34: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "First file" - link: ./first-file.md - anchor: - ➥ In file second-file.md -<<<<<<< HEAD - reference (relative) at src:24:1-29: -======= reference (relative) at src:31:1-29: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "fdw" - link: ./first-file.md - anchor: chor is possibly a bad copy paste of -<<<<<<< HEAD - reference (relative) at src:23:1-32: -======= reference (relative) at src:30:1-32: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "ff-cho" - link: ./first-file.md - anchor: chor ➥ In file second-file.md -<<<<<<< HEAD - reference (external) at src:29:1-28: -======= reference (external) at src:70:1-28: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "gitlab" - link: https://github.com - anchor: - is possibly a bad copy paste of -<<<<<<< HEAD - reference (external) at src:28:1-28: -======= reference (external) at src:69:1-28: ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks - text: "github" - link: https://github.com - anchor: - Possible copy/paste errors dumped, 4 in total. -<<<<<<< HEAD -All repository links are valid. -======= ->>>>>>> 26d87b9... [#64] Implement copy/paste protection checks From 9a6aedca0c91a77648edb4b2ba638a2fa744c9a5 Mon Sep 17 00:00:00 2001 From: Yuri Romanowski Date: Fri, 16 Dec 2022 20:43:29 +0500 Subject: [PATCH 3/3] fixup! fixup! [#64] Implement copy/paste protection checks Review: style, typos, naming --- CHANGES.md | 2 +- README.md | 2 +- src/Xrefcheck/Core.hs | 4 +- src/Xrefcheck/Scan.hs | 2 +- src/Xrefcheck/Scanners/Markdown.hs | 49 ++++++++++---------- tests/golden/check-copy-paste/expected.gold | 2 +- tests/golden/check-scan-errors/expected.gold | 2 +- 7 files changed, 32 insertions(+), 31 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 5988548b..3663d2ae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -41,7 +41,7 @@ Unreleased * [#231](https://github.com/serokell/xrefcheck/pull/231) + Anchor analysis takes now into account the appropriate case-sensitivity depending on the configured Markdown flavour. -* [240](https://github.com/serokell/xrefcheck/pull/240) +* [#240](https://github.com/serokell/xrefcheck/pull/240) + Now xrefcheck is able to detect possible copy-pastes relying on links and their names. 0.2.2 diff --git a/README.md b/README.md index 9bfd5c35..81c60f97 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ There are several ways to fix this: followed by an [copypasted intentionally](https://good.link.uri/). ``` * You can use a `` annotation to disable copy-paste check in a paragraph. - * You can use a `` annotation to disable copy-paste check within an entire file. + * You can use a `` annotation at the top of the file to disable copy-paste check within an entire file. ## Further work [↑](#xrefcheck) diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 3cb5b383..e6ad91fd 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -107,8 +107,8 @@ data FileInfoDiff = FileInfoDiff makeLenses ''FileInfoDiff diffToFileInfo :: Bool -> FileInfoDiff -> FileInfo -diffToFileInfo ignoreCpcInFile (FileInfoDiff refs anchors) = - FileInfo (DList.toList refs) (DList.toList anchors) ignoreCpcInFile +diffToFileInfo cpcEnabledInFile (FileInfoDiff refs anchors) = + FileInfo (DList.toList refs) (DList.toList anchors) cpcEnabledInFile instance Semigroup FileInfoDiff where FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d) diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index 91270a14..017e8606 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -136,7 +136,7 @@ instance Buildable ScanErrorDescription where ParagraphErrCpc txt -> [int||Expected a PARAGRAPH after \ "no duplication check in paragraph" annotation, but found #{txt}|] UnrecognisedErr txt -> [int||Unrecognised option "#{txt}", perhaps you meant - <"ignore link"|"ignore paragraph"|"ignore all"> + "ignore " or "no duplication check in "?|] specificFormatsSupport :: [([Extension], ScanAction)] -> FormatsSupport diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index a630f1c3..0829417a 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -207,11 +207,11 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process IMAGE {} -> handleLink ign ty traverseChildren _ -> handleOther ign ty traverseChildren - handleLink :: - Maybe Ignore -> - NodeType -> - ScannerM NodeCPC -> - ScannerM NodeCPC + handleLink + :: Maybe Ignore + -> NodeType + -> ScannerM NodeCPC + -> ScannerM NodeCPC handleLink ign ty traverseChildren = do -- It's common for all ignore states ssIgnore .= Nothing @@ -230,10 +230,10 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process Just (Ignore (IMSLink _) _) -> do pure defNode - handleParagraph :: - Maybe Ignore -> - ScannerM NodeCPC -> - ScannerM NodeCPC + handleParagraph + :: Maybe Ignore + -> ScannerM NodeCPC + -> ScannerM NodeCPC handleParagraph ign traverseChildren = do -- If a new paragraph was expected (this stands for True), now we -- don't expect paragraphs any more. @@ -263,11 +263,11 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process pure node - handleOther :: - Maybe Ignore -> - NodeType -> - ScannerM NodeCPC -> - ScannerM NodeCPC + handleOther + :: Maybe Ignore + -> NodeType + -> ScannerM NodeCPC + -> ScannerM NodeCPC handleOther ign ty traverseChildren = do -- If right now there was a copy/paste ignore annotation for paragraph, -- emit an error and reset these states. @@ -295,8 +295,9 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process Just (IMSLink _) -> ssIgnoreCopyPasteCheck .= Nothing _ -> pass - reportExpectedParagraphAfterIgnoreCpcAnnotation :: - NodeType -> ScannerM () + reportExpectedParagraphAfterIgnoreCpcAnnotation + :: NodeType + -> ScannerM () reportExpectedParagraphAfterIgnoreCpcAnnotation ty = use ssIgnoreCopyPasteCheck >>= \case Just (Ignore IMSParagraph modePos) -> @@ -306,11 +307,11 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process ssIgnoreCopyPasteCheck .= Nothing _ -> pass - wrapTraverseNodeWithLinkExpected :: - IgnoreLinkState -> - Maybe PosInfo -> - ScannerM NodeCPC -> - ScannerM NodeCPC + wrapTraverseNodeWithLinkExpected + :: IgnoreLinkState + -> Maybe PosInfo + -> ScannerM NodeCPC + -> ScannerM NodeCPC wrapTraverseNodeWithLinkExpected ignoreLinkState modePos = if ignoreLinkState /= ExpectingLinkInSubnodes then id @@ -325,9 +326,9 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process _ -> pass return node' - wrapTraverseNodeWithLinkExpectedForCpc :: - ScannerM NodeCPC -> - ScannerM NodeCPC + wrapTraverseNodeWithLinkExpectedForCpc + :: ScannerM NodeCPC + -> ScannerM NodeCPC wrapTraverseNodeWithLinkExpectedForCpc traverse' = do ignoreCpc <- use ssIgnoreCopyPasteCheck case ignoreCpc of diff --git a/tests/golden/check-copy-paste/expected.gold b/tests/golden/check-copy-paste/expected.gold index 1077dd7d..b99f45ea 100644 --- a/tests/golden/check-copy-paste/expected.gold +++ b/tests/golden/check-copy-paste/expected.gold @@ -4,7 +4,7 @@ scan error at src:35:1-25: Unrecognised option "no dh", perhaps you meant - <"ignore link"|"ignore paragraph"|"ignore all"> + "ignore " or "no duplication check in "? ➥ In file second-file.md diff --git a/tests/golden/check-scan-errors/expected.gold b/tests/golden/check-scan-errors/expected.gold index ca734335..f6e58c5a 100644 --- a/tests/golden/check-scan-errors/expected.gold +++ b/tests/golden/check-scan-errors/expected.gold @@ -19,7 +19,7 @@ scan error at src:21:1-50: Unrecognised option "ignore unrecognised-annotation", perhaps you meant - <"ignore link"|"ignore paragraph"|"ignore all"> + "ignore " or "no duplication check in "? ➥ In file check-second-file.md