Merge pull request #4 from ACRIOS-Systems/feature/automated_documenta…

…tion_fixed_multicomments Feature/automated documentation fixed multicomments
ACRIOS-Systems · Apr 4, 2022 · 7670fd6 · 7670fd6
2 parents b7788a5 + 8863e56
commit 7670fd6
Show file tree

Hide file tree

Showing 2 changed files with 154 additions and 57 deletions.
diff --git a/dissect/cstruct/cstruct.py b/dissect/cstruct/cstruct.py
@@ -70,7 +70,7 @@ class {name}(Structure):
     def __init__(self, cstruct, structure, source=None):
         self.structure = structure
         self.source = source
-        super({name}, self).__init__(cstruct, structure.name, structure.fields)
+        super({name}, self).__init__(cstruct, structure.name, structure.fields, structure.commentAttributes)
 
     def _read(self, stream):
         r = OrderedDict()
@@ -88,6 +88,14 @@ def __repr__(self):
         return '<Structure {name} +compiled>'
 """
 
+COMMENT_MULTILINE = r'/\*[^*]*\*+(?:[^/*][^*]*\*+)*/'
+COMMENT_INLINE = r'//[^\n]*'
+COMMENT_MULTILINE_REPEATED = r'(^[ ]*('+COMMENT_INLINE+r'|'+COMMENT_MULTILINE+r'([ ]*('+COMMENT_INLINE+r'|'+COMMENT_MULTILINE+r'))*)[ \t\r]*\n?)*^[ ]*('+COMMENT_INLINE+r'|(?P<commentBlock>'+COMMENT_MULTILINE+r'))+'
+COMMENT_REGEX_START=r'('+COMMENT_MULTILINE_REPEATED+r')?[ \t\r\n]*'
+COMMENT_REGEX_END =r'(?P<commentBlockAfter>(([ ]*'+COMMENT_MULTILINE+r')+)?[ ]*('+COMMENT_INLINE+r')?)?[ \t\r]*'
+
+#print(f"COMMENT_REGEX_START:{COMMENT_REGEX_START}")
+#print(f"COMMENT_REGEX_END:{COMMENT_REGEX_END}")
 
 class Error(Exception):
     pass
@@ -333,44 +341,58 @@ def _constants(self, data):
 
     def _enums(self, data):
         r = re.finditer(
-            r'enum\s+(?P<name>[^\s:{]+)\s*(:\s*(?P<type>[^\s]+)\s*)?\{(?P<values>[^}]+)\}\s*;',
-            data,
+            COMMENT_REGEX_START+r'enum\s+(?P<name>[^\s:{]+)\s*(:\s*(?P<type>[^\s]+)\s*)?\{(?P<values>[^}]+)\}\s*;'+COMMENT_REGEX_END,
+            data, re.MULTILINE
         )
         for t in r:
             d = t.groupdict()
 
-            nextval = 0
-            values = {}
-            for line in d['values'].split('\n'):
-                line, sep, comment = line.partition("//")
-                for v in line.split(","):
-                    key, sep, val = v.partition("=")
-                    key = key.strip()
-                    val = val.strip()
-                    if not key:
-                        continue
-                    if not val:
-                        val = nextval
-                    else:
-                        val = Expression(self.cstruct, val).evaluate({})
-
-                    nextval = val + 1
-
-                    values[key] = val
-
             if not d['type']:
                 d['type'] = 'uint32'
 
+            values, valuesDetails = self._parse_fields_enums(d['values'])
+
+            commentAttributes = self.parse_comment_block(d['commentBlock'])
+
             enum = Enum(
-                self.cstruct, d['name'], self.cstruct.resolve(d['type']), values
+                self.cstruct, d['name'], self.cstruct.resolve(d['type']), values, valuesDetails, commentAttributes
             )
             self.cstruct.addtype(enum.name, enum)
 
+    def _parse_fields_enums(self, s):
+        nextval = 0
+        values = {}
+        valuesDetails = {}
+        fields = re.finditer(
+            COMMENT_REGEX_START+r'(?P<value>[a-zA-z][^\n,/]*),?'+COMMENT_REGEX_END,
+            s, re.MULTILINE
+        )
+
+        for f in fields:
+            d = f.groupdict()
+
+            # Ignore fo now
+            commentAttributes = self.parse_comment_block(d['commentBlock'])
+
+            field = re.finditer(
+                r'(?P<key>[a-zA-z][^ =]*)[ ]*=?[ ]*(?P<value>[^ ]+)?',
+                d["value"],
+            )
+
+            f = list(field)[0].groupdict()
+
+            values[f["key"]] = Expression(self.cstruct, f["value"]).evaluate({}) if f["value"] != None else nextval
+
+            nextval = values[f["key"]] + 1
+            valuesDetails[f["key"]] = {"value":values[f["key"]], "commentAttributes":commentAttributes}
+
+        return values, valuesDetails
+
     def _structs(self, data):
         compiler = Compiler(self.cstruct)
         r = re.finditer(
-            r'(#(?P<flags>(?:compile))\s+)?((?P<typedef>typedef)\s+)?(?P<type>[^\s]+)\s+(__attribute__\(\([^)]+\)\)\s*)?(?P<name>[^\s]+)?(?P<fields>\s*\{(\s*//[^\n]*|/\*[^*]*\*/|[^}])+\}(?P<defs>\s+[^;\n]+)?)?\s*;',
-            data,
+            COMMENT_REGEX_START+r'(#(?P<flags>(?:compile))\s+)?((?P<typedef>typedef)\s+)?(?P<type>[^\s]+)\s+(__attribute__\(\([^)]+\)\)\s*)?(?P<name>[^\s]+)?(?P<fields>\s*\{(\s*//[^\n]*|/\*[^*]*\*/|[^}])+\}(?P<defs>\s+[^;\n]+)?)?\s*;'+COMMENT_REGEX_END,
+            data, re.MULTILINE
         )
         for t in r:
             d = t.groupdict()
@@ -383,8 +405,9 @@ def _structs(self, data):
                 raise ParserError("No name for struct")
 
             if d['type'] == 'struct':
-                data = self._parse_fields(d['fields'][1:-1].strip())
-                st = Structure(self.cstruct, name, data)
+                data = self._parse_fields_struct(d['fields'][1:-1].strip())
+                commentAttributes = self.parse_comment_block(d['commentBlock'])
+                st = Structure(self.cstruct, name, data, commentAttributes)
                 if d['flags'] == 'compile' or self.compiled:
                     st = compiler.compile(st)
             elif d['typedef'] == 'typedef':
@@ -400,32 +423,18 @@ def _structs(self, data):
                     td = td.strip()
                     self.cstruct.addtype(td, st)
 
-    def _parse_fields(self, s):
-        commentAttributes = {}
+    def _parse_fields_struct(self, s):
         fields = re.finditer(
-            r'(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*(?P<type>[^\s]+)\s+(?P<name>[^\s\[:]+)(\s*:\s*(?P<bits>\d+))?(\[(?P<count>[^;\n]*)\])?;',
-            s,
+            COMMENT_REGEX_START+r'(?P<type>[^\s]+)\s+(?P<name>[^\s\[:]+)(\s*:\s*(?P<bits>\d+))?(\[(?P<count>[^;\n]*)\])?;'+COMMENT_REGEX_END,
+            s, re.MULTILINE
         )
         r = []
         for f in fields:
             d = f.groupdict()
             if d['type'].startswith('//'):
                 continue
 
-            commentAttributes={}
-
-            #parse the comment header
-            if d['commentBlock'] is not None and d['commentBlock'].startswith('/*'):
-                commentfields = re.finditer(
-                    r'@(?P<commentType>[^@,;:\\]+):[ \t]*(?P<commentVal>[^@,;:\s\\]+)',
-                    d['commentBlock'],
-                )
-                for cf in commentfields:
-                    cd=cf.groupdict()
-                    try:
-                        commentAttributes[cd['commentType']]=cd['commentVal']
-                    except Exception:
-                        pass
+            commentAttributes = self.parse_comment_block(d['commentBlock'])
 
             type_ = self.cstruct.resolve(d['type'])
 
@@ -449,11 +458,32 @@ def _parse_fields(self, s):
                 d['name'] = d['name'][1:]
                 type_ = Pointer(self.cstruct, type_)
 
-            field = Field(d['name'], type_, int(d['bits']) if d['bits'] else None, commentAttributes=commentAttributes)
+            field = StructureField(d['name'], type_, int(d['bits']) if d['bits'] else None, commentAttributes=commentAttributes)
             r.append(field)
 
         return r
 
+    def parse_comment_block(self,s):
+        commentAttributes={}
+
+        #parse the comment header
+        if s is not None and s.startswith('/*'):
+            commentfields = re.finditer(
+                r'@(?P<commentType>[^@,;:\\]+):[ \t]*(?P<commentVal>[^@\n]+)',
+                s,
+            )
+            for cf in commentfields:
+                cd=cf.groupdict()
+                try:
+                    oldData = commentAttributes.get(cd['commentType'],"")
+                    if "" != oldData:
+                        oldData += " "
+                    commentAttributes[cd['commentType']]=oldData + cd['commentVal']
+                except Exception:
+                    pass
+
+        return commentAttributes
+
     def _lookups(self, data, consts):
         r = re.finditer(r'\$(?P<name>[^\s]+) = ({[^}]+})\w*\n', data)
 
@@ -757,11 +787,12 @@ def __repr__(self):
 class Structure(BaseType):
     """Type class for structures."""
 
-    def __init__(self, cstruct, name, fields=None):
+    def __init__(self, cstruct, name, fields=None, commentAttributes={}):
         self.name = name
         self.size = None
         self.lookup = OrderedDict()
         self.fields = fields if fields else []
+        self.commentAttributes = commentAttributes
 
 
         for f in self.fields:
@@ -885,7 +916,7 @@ def add_fields(self, name, type_, offset=None, commentAttributes={}):
             type_: The field type.
             offset: The field offset.
         """
-        field = Field(name, type_, offset=offset, commentAttributes=commentAttributes)
+        field = StructureField(name, type_, offset=offset, commentAttributes=commentAttributes)
         self.fields.append(field)
         self.lookup[name] = field
         self.size = None
@@ -915,7 +946,7 @@ def __repr__(self):
     def show(self, indent=0):
         """Pretty print this structure."""
         if indent == 0:
-            print("struct {}".format(self.name))
+            print("{} struct {}".format(self.commentAttributes, self.name))
 
         for field in self.fields:
             if field.offset is None:
@@ -983,7 +1014,7 @@ def reset(self):
         self._remaining = 0
 
 
-class Field(object):
+class StructureField(object):
     """Holds a structure field."""
 
     def __init__(self, name, type_, bits=None, offset=None, commentAttributes={}):
@@ -1312,9 +1343,11 @@ class Enum(RawType):
             };
     """
 
-    def __init__(self, cstruct, name, type_, values):
+    def __init__(self, cstruct, name, type_, values, valuesDetails, commentAttributes={}):
         self.type = type_
         self.values = values
+        self.valuesDetails = valuesDetails
+        self.commentAttributes = commentAttributes
         self.reverse = {}
 
         for k, v in values.items():
@@ -1365,6 +1398,9 @@ def __getattr__(self, attr):
     def __contains__(self, attr):
         return attr in self.values
 
+    def __repr__(self):
+        return '<Enum {}>'.format(self.name)
+
 
 class EnumInstance(object):
     """Implements a value instance of an Enum"""

diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -257,6 +257,15 @@ def test_enum_comments():
     assert c.Inline.foo == 9
     assert c.Inline.bar == 10
 
+    assert c.Inline.valuesDetails["hello"]["value"] == 7
+    assert c.Inline.valuesDetails["hello"]["commentAttributes"] == {}
+    assert c.Inline.valuesDetails["world"]["value"] == 8
+    assert c.Inline.valuesDetails["world"]["commentAttributes"] == {}
+    assert c.Inline.valuesDetails["foo"]["value"] == 9
+    assert c.Inline.valuesDetails["foo"]["commentAttributes"] == {}
+    assert c.Inline.valuesDetails["bar"]["value"] == 10
+    assert c.Inline.valuesDetails["bar"]["commentAttributes"] == {}
+
     assert c.Test.a == 2
     assert c.Test.b == 3
     assert c.Test.c == 4
@@ -772,16 +781,24 @@ def test_dumpstruct(capsys):
     assert captured_1.out == captured_2.out
 
 
-def test_commentfieldparse(capsys):
+def test_comment_field_parse_struct(capsys):
     c = cstruct.cstruct()
     c.load("""
-    /*discardedCom1*/
+    /*
+     * @comment: Hello,
+     * @comment: how are you?
+     */
     struct test{
+        // int notAnStruct;
         /*
-        * @scale: 0.001
-        * @unit: µtestUnit1
-        */
+         * @comment: I am fine.
+         * @comment: Thank you.
+         * @scale: 0.001
+         * @unit: µtestUnit1
+         */
         int testVar1;
+        // int testVar1;
+        // int notAnStruct;
         int testVar2;
         /* dicardedCom2
         * @scale: 5
@@ -792,15 +809,59 @@ def test_commentfieldparse(capsys):
     """, compiled=False)
 
     assert c.test.name == 'test'
+    assert c.test.commentAttributes['comment'] == 'Hello, how are you?'
 
     assert 'testVar1' in c.test.lookup
     assert 'testVar2' in c.test.lookup
     assert 'testVar2' in c.test.lookup
 
+    assert c.test.lookup['testVar1'].commentAttributes['comment'] == 'I am fine. Thank you.'
     assert c.test.lookup['testVar1'].commentAttributes['scale'] == '0.001'
     assert c.test.lookup['testVar1'].commentAttributes['unit'] == 'µtestUnit1'
 
     assert c.test.lookup['testVar2'].commentAttributes == {}
 
     assert c.test.lookup['testVar3'].commentAttributes['scale'] == '5'
-    assert c.test.lookup['testVar3'].commentAttributes['unit'] == '%testUnit2'
+    assert c.test.lookup['testVar3'].commentAttributes['unit'] == '%testUnit2'
+
+    assert "notAnStruct" not in c.test.lookup
+
+def test_comment_field_parse_enum(capsys):
+    c = cstruct.cstruct()
+    c.load("""
+    /*discardedCom1*/
+    enum test{
+        // notAnEnum=4,
+        /*
+        * @comment: Comments are working
+        */
+        testEnumVar1=5,
+        // testEnumVar1,
+        testEnumVar2,
+        // notAnEnum=14,
+        /*
+        * @comment: Comments are working 2
+        */
+        testEnumVar3,
+    };
+    """, compiled=False)
+
+    assert c.test.name == 'test'
+    assert 'comment' not in c.test.commentAttributes
+
+    assert 'testEnumVar1' in c.test.values
+    assert 'testEnumVar1' in c.test.valuesDetails
+    assert 'testEnumVar2' in c.test.values
+    assert 'testEnumVar2' in c.test.valuesDetails
+    assert 'testEnumVar3' in c.test.values
+    assert 'testEnumVar3' in c.test.valuesDetails
+
+
+    assert c.test.valuesDetails['testEnumVar1']["commentAttributes"]['comment'] == 'Comments are working'
+
+    assert c.test.valuesDetails['testEnumVar2']["commentAttributes"] == {}
+
+    assert c.test.valuesDetails['testEnumVar3']["commentAttributes"]['comment'] == 'Comments are working 2'
+
+    assert "notAnEnum" not in c.test.values
+    assert "notAnEnum" not in c.test.valuesDetails