Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add doxygen comment parser. #68

Merged
merged 1 commit into from
Mar 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ haskell_library(
name = "lexer",
srcs = [
"src/Language/Cimple/Lexer.hs",
"src/Language/Cimple/ParseResult.hs",
"src/Language/Cimple/Tokens.hs",
],
src_strip_prefix = "src",
Expand All @@ -33,7 +34,6 @@ haskell_library(
src_strip_prefix = "src",
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":lexer",
hazel_library("aeson"),
hazel_library("base"),
hazel_library("data-fix"),
Expand Down Expand Up @@ -81,6 +81,28 @@ haskell_library(
],
)

happy_parser(
name = "CommentParser",
src = "src/Language/Cimple/CommentParser.y",
preproc = "expand_yacc.pl",
)

haskell_library(
name = "comment-parser",
srcs = [":CommentParser"],
src_strip_prefix = "src",
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":ast",
":describe-ast",
":lexer",
hazel_library("array"),
hazel_library("base"),
hazel_library("data-fix"),
hazel_library("text"),
],
)

happy_parser(
name = "TreeParser",
src = "src/Language/Cimple/TreeParser.y",
Expand All @@ -94,6 +116,7 @@ haskell_library(
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":ast",
":comment-parser",
":describe-ast",
":lexer",
hazel_library("array"),
Expand All @@ -111,6 +134,7 @@ haskell_library(
"src/Language/Cimple/Ast.hs",
"src/Language/Cimple/DescribeAst.hs",
"src/Language/Cimple/Flatten.hs",
"src/Language/Cimple/ParseResult.hs",
"src/Language/Cimple/Tokens.hs",
],
),
Expand Down
2 changes: 2 additions & 0 deletions cimple.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ library
other-modules:
Language.Cimple.Annot
, Language.Cimple.Ast
, Language.Cimple.CommentParser
, Language.Cimple.DescribeAst
, Language.Cimple.Flatten
, Language.Cimple.Graph
, Language.Cimple.Lexer
, Language.Cimple.ParseResult
, Language.Cimple.Parser
, Language.Cimple.SemCheck.Includes
, Language.Cimple.Tokens
Expand Down
37 changes: 37 additions & 0 deletions src/Language/Cimple/Ast.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ module Language.Cimple.Ast
, Node, NodeF (..)
, Scope (..)
, CommentStyle (..)
, Comment
, CommentF (..)
) where

import Data.Aeson (FromJSON, FromJSON1, ToJSON,
Expand Down Expand Up @@ -47,6 +49,7 @@ data NodeF lexeme a
| CommentSection a [a] a
| CommentSectionEnd lexeme
| Commented a a
| CommentInfo (Comment lexeme)
-- Namespace-like blocks
| ExternC [a]
-- An inferred coherent block of nodes, printed without empty lines
Expand Down Expand Up @@ -126,6 +129,40 @@ type Node lexeme = Fix (NodeF lexeme)
instance FromJSON lexeme => FromJSON1 (NodeF lexeme)
instance ToJSON lexeme => ToJSON1 (NodeF lexeme)

data CommentF lexeme a
= DocComment [a]
| DocWord lexeme
| DocSentence [a] lexeme
| DocNewline

| DocBrief [a]
| DocDeprecated [a]
| DocParam (Maybe lexeme) lexeme [a]
| DocReturn [a]
| DocRetval [lexeme] [a]
| DocSee lexeme [a]

| DocLine [a]
| DocBullet [a] [a]
| DocBulletList [a]

| DocColon lexeme
| DocRef lexeme
| DocP lexeme
| DocLParen a
| DocRParen a
| DocAssignOp AssignOp lexeme lexeme
| DocBinaryOp BinaryOp lexeme lexeme
| DocMinus lexeme a
| DocSlash lexeme a
deriving (Show, Read, Eq, Ord, Generic, Generic1, Functor, Foldable, Traversable)
deriving (Show1, Read1, Eq1, Ord1) via FunctorClassesDefault (CommentF lexeme)

type Comment lexeme = Fix (CommentF lexeme)

instance FromJSON lexeme => FromJSON1 (CommentF lexeme)
instance ToJSON lexeme => ToJSON1 (CommentF lexeme)

data AssignOp
= AopEq
| AopMul
Expand Down
199 changes: 199 additions & 0 deletions src/Language/Cimple/CommentParser.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
{
{-# LANGUAGE OverloadedStrings #-}
module Language.Cimple.CommentParser
( parseComment
) where

import Data.Fix (Fix (..))
import Data.Text (Text)
import qualified Data.Text as Text

import Language.Cimple.Ast (AssignOp (..), BinaryOp (..),
Comment, CommentF (..), Node,
NodeF (..))
import Language.Cimple.DescribeAst (describeLexeme, sloc)
import Language.Cimple.Lexer (Lexeme (..))
import Language.Cimple.ParseResult (ParseResult)
import Language.Cimple.Tokens (LexemeClass (..))
}

%name parseComment Comment

%error {parseError}
%errorhandlertype explist
%monad {ParseResult}
%tokentype {Term}
%token
'@brief' { L _ CmtCommand "@brief" }
'@deprecated' { L _ CmtCommand "@deprecated" }
'@implements' { L _ CmtCommand "@implements" }
'@param' { L _ CmtCommand "@param" }
'@ref' { L _ CmtCommand "@ref" }
'@p' { L _ CmtCommand "@p" }
'@return' { L _ CmtCommand "@return" }
'@retval' { L _ CmtCommand "@retval" }
'@see' { L _ CmtCommand "@see" }

' ' { L _ CmtIndent " " }
'INDENT1' { L _ CmtIndent " " }
'INDENT2' { L _ CmtIndent " " }

'(' { L _ PctLParen _ }
')' { L _ PctRParen _ }
',' { L _ PctComma _ }
':' { L _ PctColon _ }
'/' { L _ PctSlash _ }
'=' { L _ PctEq _ }
'==' { L _ PctEqEq _ }
'!=' { L _ PctEMarkEq _ }
'>=' { L _ PctGreaterEq _ }
';' { L _ PctSemicolon _ }
'.' { L _ PctPeriod _ }
'...' { L _ PctEllipsis _ }
'?' { L _ PctQMark _ }
'!' { L _ PctEMark _ }
'-' { L _ PctMinus _ }
'\n' { L _ PpNewline _ }
'/**' { L _ CmtStartDoc _ }
'*/' { L _ CmtEnd _ }
LIT_INTEGER { L _ LitInteger _ }
LIT_STRING { L _ LitString _ }
CMT_ATTR { L _ CmtAttr _ }
CMT_CODE { L _ CmtCode _ }
CMT_WORD { L _ CmtWord _ }
CMT_REF { L _ CmtRef _ }

%left '.' '?' ',' ';'
%left '-'
%left '/'
%left '(' ')'
%right NEG

%%

Comment :: { NonTerm }
Comment
: '/**' '\n' Blocks '*/' { Fix $ DocComment $3 }
| '/**' WordsWithoutNewlines '*/' { Fix $ DocComment [Fix $ DocLine $2] }
| '/**' Command(WordsWithoutNewlines) '*/' { Fix $ DocComment [$2] }
| '/**' Command(WordsWithoutNewlines) '\n' Blocks '*/' { Fix $ DocComment ($2 : $4) }

Blocks :: { [NonTerm] }
Blocks
: BlockList { reverse $1 }

BlockList :: { [NonTerm] }
BlockList
: Block { [$1] }
| BlockList Block { $2 : $1 }

Block :: { NonTerm }
Block
: '\n' { Fix DocNewline }
| ' ' Command(IndentedSentence) { $2 }
| ' ' WordsWithoutNewlines '\n' { Fix $ DocLine $2 }
| BulletListI { Fix $ DocBulletList (reverse $1) }

Command(x)
: '@brief' x { Fix $ DocBrief $2 }
| '@param' CMT_WORD x { Fix $ DocParam Nothing $2 $3 }
| '@param' CMT_ATTR CMT_WORD x { Fix $ DocParam (Just $2) $3 $4 }
| '@retval' ConstExpr x { Fix $ DocRetval $2 $3 }
| '@return' x { Fix $ DocReturn $2 }
| '@return' '\n' BulletListII { Fix $ DocReturn (Fix (DocLine []) : $3) }
| '@see' CMT_WORD x { Fix $ DocSee $2 $3 }
| '@deprecated' x { Fix $ DocDeprecated $2 }

BulletListI :: { [NonTerm] }
BulletListI
: BulletI { [$1] }
| BulletListI BulletI { $2 : $1 }

BulletI :: { NonTerm }
BulletI
: ' ' '-' WordsWithoutNewlines '\n' BulletListII { Fix $ DocBullet $3 (reverse $5) }

BulletListII :: { [NonTerm] }
BulletListII
: { [] }
| BulletListII BulletII { $2 : $1 }

BulletII :: { NonTerm }
BulletII
: 'INDENT1' '-' WordsWithoutNewlines '\n' BulletIIConts { Fix $ DocBullet ($3 ++ $5) [] }

BulletIIConts :: { [NonTerm] }
BulletIIConts
: { [] }
| BulletIIConts BulletIICont { $1 ++ $2 }

BulletIICont :: { [NonTerm] }
BulletIICont
: 'INDENT2' WordsWithoutNewlines '\n' { $2 }

IndentedSentence :: { [NonTerm] }
IndentedSentence
: WordsWithoutNewlines '\n' { [Fix $ DocLine $1] }
| WordsWithoutNewlines '\n' 'INDENT1' IndentedSentence { Fix (DocLine $1) : $4 }

WordsWithoutNewlines :: { [NonTerm] }
WordsWithoutNewlines
: SentenceList(WordList) { $1 }

WordList :: { [NonTerm] }
WordList
: Word { [$1] }
| WordList Word { $2 : $1 }

SentenceList(x)
: x { reverse $1 }
| SentenceList(x) ';' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) ',' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) '.' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) '?' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) ';' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) ',' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) '.' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) '?' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }

Word :: { NonTerm }
Word
: Atom { Fix $ DocWord $1 }
| Atom ':' { Fix $ DocColon $1 }
| Atom '-' Word { Fix $ DocMinus $1 $3 }
| Atom '/' Word { Fix $ DocSlash $1 $3 }
| Atom '=' Atom { Fix $ DocAssignOp AopEq $1 $3 }
| Atom '!=' Atom { Fix $ DocBinaryOp BopNe $1 $3 }
| Atom '>=' Atom { Fix $ DocBinaryOp BopGe $1 $3 }
| Atom '==' Atom { Fix $ DocBinaryOp BopEq $1 $3 }
| '@ref' Atom { Fix $ DocRef $2 }
| '@p' Atom { Fix $ DocP $2 }
| '(' Word { Fix $ DocLParen $2 }
| Word ')' { Fix $ DocRParen $1 }

Atom :: { Term }
Atom
: CMT_WORD { $1 }
| CMT_CODE { $1 }
| LIT_INTEGER { $1 }
| LIT_STRING { $1 }
| '...' { $1 }

ConstExpr :: { [Term] }
ConstExpr
: Atom { [$1] }
| '-' Atom { [$1, $2] }

{
type Term = Lexeme Text
type NonTerm = Comment Term

failAt :: Lexeme Text -> String -> ParseResult a
failAt n msg =
fail $ Text.unpack (sloc "" n) <> ": unexpected " <> describeLexeme n <> msg

parseError :: ([Lexeme Text], [String]) -> ParseResult a
parseError ([], options) = fail $ " end of comment; expected one of " <> show options
parseError (n:_, []) = failAt n "; expected end of comment"
parseError (n:_, options) = failAt n $ "; expected one of " <> show options
}
4 changes: 4 additions & 0 deletions src/Language/Cimple/DescribeAst.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{-# LANGUAGE StrictData #-}
module Language.Cimple.DescribeAst
( HasLocation (..)
, describeLexeme
, describeNode
) where

Expand Down Expand Up @@ -37,3 +38,6 @@ describeNode node = case unFix node of
where
ellipsis :: String
ellipsis = "..."

describeLexeme :: Show a => Lexeme a -> String
describeLexeme = show
10 changes: 8 additions & 2 deletions src/Language/Cimple/Flatten.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
{-# LANGUAGE TypeOperators #-}
module Language.Cimple.Flatten (lexemes) where

import Data.Fix (Fix (..))
import Data.Maybe (maybeToList)
import GHC.Generics
import Language.Cimple.Ast (AssignOp, BinaryOp, CommentStyle,
LiteralType, NodeF (..), Scope, UnaryOp)
import Language.Cimple.Ast (AssignOp, BinaryOp, CommentF,
CommentStyle, LiteralType, NodeF (..),
Scope, UnaryOp)

class Concats t a where
concats :: t -> [a]
Expand Down Expand Up @@ -53,11 +55,15 @@ instance {-# OVERLAPPING #-} GenConcatsFlatten a a where
instance {-# OVERLAPPABLE #-} GenConcatsFlatten b a => GenConcatsFlatten [b] a where
gconcatsFlatten = concatMap gconcatsFlatten

instance GenConcatsFlatten (Fix (CommentF a)) a where
gconcatsFlatten = error "TODO: gconcatsFlatten for CommentF"

instance GenConcatsFlatten t a => GenConcats (Rec0 t) a where
gconcats (K1 x) = gconcatsFlatten x

-- Uses the default signature, which delegates to the generic stuff
instance Concats (NodeF a [a]) a
instance Concats (CommentF a [a]) a

lexemes :: NodeF lexeme [lexeme] -> [lexeme]
lexemes = concats
3 changes: 2 additions & 1 deletion src/Language/Cimple/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import Language.Cimple.Lexer (Lexeme, runAlex)
import Language.Cimple.MapAst (TextActions, mapAst,
textActions)
import qualified Language.Cimple.Parser as Parser
import qualified Language.Cimple.ParseResult as ParseResult
import Language.Cimple.Program (Program)
import qualified Language.Cimple.Program as Program
import Language.Cimple.TranslationUnit (TranslationUnit)
Expand Down Expand Up @@ -51,7 +52,7 @@ parseText contents =

parseTextPedantic :: Text -> Either String [TextNode]
parseTextPedantic =
parseText >=> TreeParser.toEither . TreeParser.parseTranslationUnit
parseText >=> ParseResult.toEither . TreeParser.parseTranslationUnit


parseFile :: FilePath -> IO (Either String (TranslationUnit Text))
Expand Down
Loading