Skip to content

Commit

Permalink
feat: Add doxygen comment parser.
Browse files Browse the repository at this point in the history
This one is very strict and most comments in toxcore don't comply and
need to be fixed.
  • Loading branch information
iphydf committed Mar 5, 2022
1 parent e404717 commit a55da30
Show file tree
Hide file tree
Showing 15 changed files with 607 additions and 71 deletions.
26 changes: 25 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ haskell_library(
name = "lexer",
srcs = [
"src/Language/Cimple/Lexer.hs",
"src/Language/Cimple/ParseResult.hs",
"src/Language/Cimple/Tokens.hs",
],
src_strip_prefix = "src",
Expand All @@ -33,7 +34,6 @@ haskell_library(
src_strip_prefix = "src",
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":lexer",
hazel_library("aeson"),
hazel_library("base"),
hazel_library("data-fix"),
Expand Down Expand Up @@ -81,6 +81,28 @@ haskell_library(
],
)

happy_parser(
name = "CommentParser",
src = "src/Language/Cimple/CommentParser.y",
preproc = "expand_yacc.pl",
)

haskell_library(
name = "comment-parser",
srcs = [":CommentParser"],
src_strip_prefix = "src",
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":ast",
":describe-ast",
":lexer",
hazel_library("array"),
hazel_library("base"),
hazel_library("data-fix"),
hazel_library("text"),
],
)

happy_parser(
name = "TreeParser",
src = "src/Language/Cimple/TreeParser.y",
Expand All @@ -94,6 +116,7 @@ haskell_library(
visibility = ["//hs-cimple:__subpackages__"],
deps = [
":ast",
":comment-parser",
":describe-ast",
":lexer",
hazel_library("array"),
Expand All @@ -111,6 +134,7 @@ haskell_library(
"src/Language/Cimple/Ast.hs",
"src/Language/Cimple/DescribeAst.hs",
"src/Language/Cimple/Flatten.hs",
"src/Language/Cimple/ParseResult.hs",
"src/Language/Cimple/Tokens.hs",
],
),
Expand Down
2 changes: 2 additions & 0 deletions cimple.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ library
other-modules:
Language.Cimple.Annot
, Language.Cimple.Ast
, Language.Cimple.CommentParser
, Language.Cimple.DescribeAst
, Language.Cimple.Flatten
, Language.Cimple.Graph
, Language.Cimple.Lexer
, Language.Cimple.ParseResult
, Language.Cimple.Parser
, Language.Cimple.SemCheck.Includes
, Language.Cimple.Tokens
Expand Down
37 changes: 37 additions & 0 deletions src/Language/Cimple/Ast.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ module Language.Cimple.Ast
, Node, NodeF (..)
, Scope (..)
, CommentStyle (..)
, Comment
, CommentF (..)
) where

import Data.Aeson (FromJSON, FromJSON1, ToJSON,
Expand Down Expand Up @@ -47,6 +49,7 @@ data NodeF lexeme a
| CommentSection a [a] a
| CommentSectionEnd lexeme
| Commented a a
| CommentInfo (Comment lexeme)
-- Namespace-like blocks
| ExternC [a]
-- An inferred coherent block of nodes, printed without empty lines
Expand Down Expand Up @@ -126,6 +129,40 @@ type Node lexeme = Fix (NodeF lexeme)
instance FromJSON lexeme => FromJSON1 (NodeF lexeme)
instance ToJSON lexeme => ToJSON1 (NodeF lexeme)

data CommentF lexeme a
= DocComment [a]
| DocWord lexeme
| DocSentence [a] lexeme
| DocNewline

| DocBrief [a]
| DocDeprecated [a]
| DocParam (Maybe lexeme) lexeme [a]
| DocReturn [a]
| DocRetval [lexeme] [a]
| DocSee lexeme [a]

| DocLine [a]
| DocBullet [a] [a]
| DocBulletList [a]

| DocColon lexeme
| DocRef lexeme
| DocP lexeme
| DocLParen a
| DocRParen a
| DocAssignOp AssignOp lexeme lexeme
| DocBinaryOp BinaryOp lexeme lexeme
| DocMinus lexeme a
| DocSlash lexeme a
deriving (Show, Read, Eq, Ord, Generic, Generic1, Functor, Foldable, Traversable)
deriving (Show1, Read1, Eq1, Ord1) via FunctorClassesDefault (CommentF lexeme)

type Comment lexeme = Fix (CommentF lexeme)

instance FromJSON lexeme => FromJSON1 (CommentF lexeme)
instance ToJSON lexeme => ToJSON1 (CommentF lexeme)

data AssignOp
= AopEq
| AopMul
Expand Down
201 changes: 201 additions & 0 deletions src/Language/Cimple/CommentParser.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
{
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Cimple.CommentParser
( parseComment
) where

import Data.Fix (Fix (..))
import Data.Text (Text)
import qualified Data.Text as Text

import Language.Cimple.Ast (AssignOp (..), BinaryOp (..),
Comment, CommentF (..), Node,
NodeF (..))
import Language.Cimple.DescribeAst (describeLexeme, sloc)
import Language.Cimple.Lexer (Lexeme (..))
import Language.Cimple.ParseResult (ParseResult)
import Language.Cimple.Tokens (LexemeClass (..))
}

%name parseComment Comment

%error {parseError}
%errorhandlertype explist
%monad {ParseResult}
%tokentype {Term}
%token
'@brief' { L _ CmtCommand "@brief" }
'@deprecated' { L _ CmtCommand "@deprecated" }
'@implements' { L _ CmtCommand "@implements" }
'@param' { L _ CmtCommand "@param" }
'@ref' { L _ CmtCommand "@ref" }
'@p' { L _ CmtCommand "@p" }
'@return' { L _ CmtCommand "@return" }
'@retval' { L _ CmtCommand "@retval" }
'@see' { L _ CmtCommand "@see" }

' ' { L _ CmtIndent " " }
'INDENT1' { L _ CmtIndent " " }
'INDENT2' { L _ CmtIndent " " }

'(' { L _ PctLParen _ }
')' { L _ PctRParen _ }
',' { L _ PctComma _ }
':' { L _ PctColon _ }
'/' { L _ PctSlash _ }
'=' { L _ PctEq _ }
'==' { L _ PctEqEq _ }
'!=' { L _ PctEMarkEq _ }
'>=' { L _ PctGreaterEq _ }
';' { L _ PctSemicolon _ }
'.' { L _ PctPeriod _ }
'...' { L _ PctEllipsis _ }
'?' { L _ PctQMark _ }
'!' { L _ PctEMark _ }
'-' { L _ PctMinus _ }
'\n' { L _ PpNewline _ }
'/**' { L _ CmtStartDoc _ }
'*/' { L _ CmtEnd _ }
LIT_INTEGER { L _ LitInteger _ }
LIT_STRING { L _ LitString _ }
CMT_ATTR { L _ CmtAttr _ }
CMT_CODE { L _ CmtCode _ }
CMT_WORD { L _ CmtWord _ }
CMT_REF { L _ CmtRef _ }

%left '.' '?' ',' ';'
%left '-'
%left '/'
%left '(' ')'
%right NEG

%%

Comment :: { NonTerm }
Comment
: '/**' '\n' Blocks '*/' { Fix $ DocComment $3 }
| '/**' WordsWithoutNewlines '*/' { Fix $ DocComment [Fix $ DocLine $2] }
| '/**' Command(WordsWithoutNewlines) '*/' { Fix $ DocComment [$2] }
| '/**' Command(WordsWithoutNewlines) '\n' Blocks '*/' { Fix $ DocComment ($2 : $4) }

Blocks :: { [NonTerm] }
Blocks
: BlockList { reverse $1 }

BlockList :: { [NonTerm] }
BlockList
: Block { [$1] }
| BlockList Block { $2 : $1 }

Block :: { NonTerm }
Block
: '\n' { Fix DocNewline }
| ' ' Command(IndentedSentence) { $2 }
| ' ' WordsWithoutNewlines '\n' { Fix $ DocLine $2 }
| BulletListI { Fix $ DocBulletList (reverse $1) }

Command(x)
: '@brief' x { Fix $ DocBrief $2 }
| '@param' CMT_WORD x { Fix $ DocParam Nothing $2 $3 }
| '@param' CMT_ATTR CMT_WORD x { Fix $ DocParam (Just $2) $3 $4 }
| '@retval' ConstExpr x { Fix $ DocRetval $2 $3 }
| '@return' x { Fix $ DocReturn $2 }
| '@return' '\n' BulletListII { Fix $ DocReturn (Fix (DocLine []) : $3) }
| '@see' CMT_WORD x { Fix $ DocSee $2 $3 }
| '@deprecated' x { Fix $ DocDeprecated $2 }

BulletListI :: { [NonTerm] }
BulletListI
: BulletI { [$1] }
| BulletListI BulletI { $2 : $1 }

BulletI :: { NonTerm }
BulletI
: ' ' '-' WordsWithoutNewlines '\n' BulletListII { Fix $ DocBullet $3 (reverse $5) }

BulletListII :: { [NonTerm] }
BulletListII
: { [] }
| BulletListII BulletII { $2 : $1 }

BulletII :: { NonTerm }
BulletII
: 'INDENT1' '-' WordsWithoutNewlines '\n' BulletIIConts { Fix $ DocBullet ($3 ++ $5) [] }

BulletIIConts :: { [NonTerm] }
BulletIIConts
: { [] }
| BulletIIConts BulletIICont { $1 ++ $2 }

BulletIICont :: { [NonTerm] }
BulletIICont
: 'INDENT2' WordsWithoutNewlines '\n' { $2 }

IndentedSentence :: { [NonTerm] }
IndentedSentence
: WordsWithoutNewlines '\n' { [Fix $ DocLine $1] }
| WordsWithoutNewlines '\n' 'INDENT1' IndentedSentence { Fix (DocLine $1) : $4 }

WordsWithoutNewlines :: { [NonTerm] }
WordsWithoutNewlines
: SentenceList(WordList) { $1 }

WordList :: { [NonTerm] }
WordList
: Word { [$1] }
| WordList Word { $2 : $1 }

SentenceList(x)
: x { reverse $1 }
| SentenceList(x) ';' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) ',' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) '.' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) '?' { [Fix (DocSentence $1 $2)] }
| SentenceList(x) ';' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) ',' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) '.' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }
| SentenceList(x) '?' SentenceList(x) { Fix (DocSentence $1 $2) : $3 }

Word :: { NonTerm }
Word
: Atom { Fix $ DocWord $1 }
| Atom ':' { Fix $ DocColon $1 }
| Atom '-' Word { Fix $ DocMinus $1 $3 }
| Atom '/' Word { Fix $ DocSlash $1 $3 }
| Atom '=' Atom { Fix $ DocAssignOp AopEq $1 $3 }
| Atom '!=' Atom { Fix $ DocBinaryOp BopNe $1 $3 }
| Atom '>=' Atom { Fix $ DocBinaryOp BopGe $1 $3 }
| Atom '==' Atom { Fix $ DocBinaryOp BopEq $1 $3 }
| '@ref' Atom { Fix $ DocRef $2 }
| '@p' Atom { Fix $ DocP $2 }
| '(' Word { Fix $ DocLParen $2 }
| Word ')' { Fix $ DocRParen $1 }

Atom :: { Term }
Atom
: CMT_WORD { $1 }
| CMT_CODE { $1 }
| LIT_INTEGER { $1 }
| LIT_STRING { $1 }
| '...' { $1 }

ConstExpr :: { [Term] }
ConstExpr
: Atom { [$1] }
| '-' Atom { [$1, $2] }

{
type Term = Lexeme Text
type NonTerm = Comment Term

failAt :: Lexeme Text -> String -> ParseResult a
failAt n msg =
fail $ Text.unpack (sloc "" n) <> ": unexpected " <> describeLexeme n <> msg

parseError :: ([Lexeme Text], [String]) -> ParseResult a
parseError ([], options) = fail $ " end of comment; expected one of " <> show options
parseError (n:_, []) = failAt n "; expected end of comment"
parseError (n:_, options) = failAt n $ "; expected one of " <> show options
}
4 changes: 4 additions & 0 deletions src/Language/Cimple/DescribeAst.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{-# LANGUAGE StrictData #-}
module Language.Cimple.DescribeAst
( HasLocation (..)
, describeLexeme
, describeNode
) where

Expand Down Expand Up @@ -37,3 +38,6 @@ describeNode node = case unFix node of
where
ellipsis :: String
ellipsis = "..."

describeLexeme :: Show a => Lexeme a -> String
describeLexeme = show
10 changes: 8 additions & 2 deletions src/Language/Cimple/Flatten.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
{-# LANGUAGE TypeOperators #-}
module Language.Cimple.Flatten (lexemes) where

import Data.Fix (Fix (..))
import Data.Maybe (maybeToList)
import GHC.Generics
import Language.Cimple.Ast (AssignOp, BinaryOp, CommentStyle,
LiteralType, NodeF (..), Scope, UnaryOp)
import Language.Cimple.Ast (AssignOp, BinaryOp, CommentF,
CommentStyle, LiteralType, NodeF (..),
Scope, UnaryOp)

class Concats t a where
concats :: t -> [a]
Expand Down Expand Up @@ -53,11 +55,15 @@ instance {-# OVERLAPPING #-} GenConcatsFlatten a a where
instance {-# OVERLAPPABLE #-} GenConcatsFlatten b a => GenConcatsFlatten [b] a where
gconcatsFlatten = concatMap gconcatsFlatten

instance GenConcatsFlatten (Fix (CommentF a)) a where
gconcatsFlatten = error "TODO: gconcatsFlatten for CommentF"

instance GenConcatsFlatten t a => GenConcats (Rec0 t) a where
gconcats (K1 x) = gconcatsFlatten x

-- Uses the default signature, which delegates to the generic stuff
instance Concats (NodeF a [a]) a
instance Concats (CommentF a [a]) a

lexemes :: NodeF lexeme [lexeme] -> [lexeme]
lexemes = concats
3 changes: 2 additions & 1 deletion src/Language/Cimple/IO.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import Language.Cimple.Lexer (Lexeme, runAlex)
import Language.Cimple.MapAst (TextActions, mapAst,
textActions)
import qualified Language.Cimple.Parser as Parser
import qualified Language.Cimple.ParseResult as ParseResult
import Language.Cimple.Program (Program)
import qualified Language.Cimple.Program as Program
import Language.Cimple.TranslationUnit (TranslationUnit)
Expand Down Expand Up @@ -51,7 +52,7 @@ parseText contents =

parseTextPedantic :: Text -> Either String [TextNode]
parseTextPedantic =
parseText >=> TreeParser.toEither . TreeParser.parseTranslationUnit
parseText >=> ParseResult.toEither . TreeParser.parseTranslationUnit


parseFile :: FilePath -> IO (Either String (TranslationUnit Text))
Expand Down
Loading

0 comments on commit a55da30

Please sign in to comment.