Skip to content

Commit

Permalink
lib/advisories: add advisoryDetails field (and parse it)
Browse files Browse the repository at this point in the history
Add the `advisoryDetails :: Text` field, which reflects the OSV
`details` field.  It is intended to be CommonMark content.

When parsing, use a nasty hack to get the source range of the TOML
header, so that we can drop it and store the remainder of the input
as the `advisoryDetails`.
  • Loading branch information
frasertweedale authored and blackheaven committed Jun 20, 2023
1 parent ccd084b commit 5d63dd6
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 2 deletions.
1 change: 1 addition & 0 deletions code/hsec-tools/hsec-tools.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ library
toml-reader ^>= 0.1 || ^>= 0.2,
aeson >= 2,
pandoc-types >= 1.22 && < 2,
parsec >= 3 && < 4,
commonmark-pandoc >= 0.2 && < 0.3
hs-source-dirs: src
default-language: Haskell2010
Expand Down
3 changes: 3 additions & 0 deletions code/hsec-tools/src/Security/Advisories/Definition.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ data Advisory = Advisory
, advisoryPandoc :: Pandoc -- ^ Parsed document, without TOML front matter
, advisoryHtml :: Text
, advisorySummary :: Text
-- ^ A one-line, English textual summary of the vulnerability
, advisoryDetails :: Text
-- ^ Details of the vulnerability (CommonMark), without TOML front matter
}
deriving stock (Show)

Expand Down
67 changes: 65 additions & 2 deletions code/hsec-tools/src/Security/Advisories/Parse.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}

Expand All @@ -20,6 +22,7 @@ import Data.Foldable (toList)
import Data.Functor.Identity (Identity(Identity))
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (fromMaybe)
import Data.Monoid (First(..))
import Data.Traversable (for)
import Data.Tuple (swap)
import GHC.Generics (Generic)
Expand All @@ -40,11 +43,13 @@ import Distribution.Types.VersionRange (VersionRange)

import Commonmark.Html (Html, renderHtml)
import qualified Commonmark.Parser as Commonmark
import Commonmark.Types (HasAttributes(..), IsBlock(..), IsInline(..), Rangeable(..), SourceRange(..))
import Commonmark.Pandoc (Cm(unCm))
import qualified TOML
import Text.Pandoc.Builder (Blocks, Many(..))
import Text.Pandoc.Definition (Block(..), Inline(..), Pandoc(..))
import Text.Pandoc.Walk (query)
import Text.Parsec.Pos (sourceLine)

import Security.Advisories.Definition
import Security.OSV (Reference(..), referenceTypes)
Expand Down Expand Up @@ -100,6 +105,24 @@ parseAdvisory policy attrs raw = do
!summary <- first MarkdownFormatError $ parseAdvisorySummary doc
table <- firstPretty TomlError TOML.renderTOMLError $ TOML.decode frontMatter

-- Re-parse as FirstSourceRange to find the source range of
-- the TOML header.
FirstSourceRange (First mRange) <-
firstPretty MarkdownError (T.pack . show) (Commonmark.commonmark "input" raw)
let
details = case mRange of
Just (SourceRange ((_,end):_)) ->
T.unlines
. dropWhile T.null
. fmap snd
. dropWhile ((< sourceLine end) . fst)
. zip [1..]
$ T.lines raw
_ ->
-- no block elements? empty range list?
-- these shouldn't happen, but better be total
raw

-- Re-parse input as HTML. This will probably go away; we now store the
-- Pandoc doc and can render that instead, where needed.
html <-
Expand All @@ -108,7 +131,7 @@ parseAdvisory policy attrs raw = do
(Commonmark.commonmark "input" raw :: Either Commonmark.ParseError (Html ()))

first (mkPretty AdvisoryError (T.pack . show)) $
parseAdvisoryTable attrs policy table doc summary html
parseAdvisoryTable attrs policy table doc summary details html

where
firstPretty
Expand All @@ -131,9 +154,10 @@ parseAdvisoryTable
-> TOML.Table
-> Pandoc -- ^ parsed document (without frontmatter)
-> T.Text -- ^ summary
-> T.Text -- ^ details
-> T.Text -- ^ rendered HTML
-> Either TableParseError Advisory
parseAdvisoryTable oob policy table doc summary html = runTableParser $ do
parseAdvisoryTable oob policy table doc summary details html = runTableParser $ do
hasNoKeysBut ["advisory", "affected", "versions", "references"] table
advisory <- mandatory table "advisory" isTable

Expand Down Expand Up @@ -198,6 +222,7 @@ parseAdvisoryTable oob policy table doc summary html = runTableParser $ do
, advisoryPandoc = doc
, advisoryHtml = html
, advisorySummary = summary
, advisoryDetails = details
}

advisoryDoc :: Blocks -> Either T.Text (T.Text, [Block])
Expand Down Expand Up @@ -435,3 +460,41 @@ describeValue TOML.OffsetDateTime {} = "date/time with offset"
describeValue TOML.LocalDateTime {} = "local date/time"
describeValue TOML.LocalDate {} = "local date"
describeValue TOML.LocalTime {} = "local time"

-- | A solution to an awkward problem: how to delete the TOML
-- block. We parse into this type to get the source range of
-- the first block element. We can use it to delete the lines
-- from the input.
--
newtype FirstSourceRange = FirstSourceRange (First SourceRange)
deriving (Show, Semigroup, Monoid)

instance Rangeable FirstSourceRange where
ranged range = (FirstSourceRange (First (Just range)) <>)

instance HasAttributes FirstSourceRange where
addAttributes _ = id

instance IsBlock FirstSourceRange FirstSourceRange where
paragraph _ = mempty
plain _ = mempty
thematicBreak = mempty
blockQuote _ = mempty
codeBlock _ = mempty
heading _ = mempty
rawBlock _ = mempty
referenceLinkDefinition _ = mempty
list _ = mempty

instance IsInline FirstSourceRange where
lineBreak = mempty
softBreak = mempty
str _ = mempty
entity _ = mempty
escapedChar _ = mempty
emph = id
strong = id
link _ _ _ = mempty
image _ _ _ = mempty
code _ = mempty
rawInline _ _ = mempty

0 comments on commit 5d63dd6

Please sign in to comment.