From fdf41d3230ff8680c08640134cda885416915309 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 21 Feb 2023 11:21:27 +0100 Subject: [PATCH 01/43] [issue-382] add lexer Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/lexer/__init__.py | 0 src/spdx/parser/tagvalue/lexer/tagvalue.py | 242 +++++++++++++++ tests/spdx/parser/tagvalue/__init__.py | 0 .../parser/tagvalue/test_tag_value_lexer.py | 288 ++++++++++++++++++ 4 files changed, 530 insertions(+) create mode 100644 src/spdx/parser/tagvalue/lexer/__init__.py create mode 100644 src/spdx/parser/tagvalue/lexer/tagvalue.py create mode 100644 tests/spdx/parser/tagvalue/__init__.py create mode 100644 tests/spdx/parser/tagvalue/test_tag_value_lexer.py diff --git a/src/spdx/parser/tagvalue/lexer/__init__.py b/src/spdx/parser/tagvalue/lexer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/spdx/parser/tagvalue/lexer/tagvalue.py b/src/spdx/parser/tagvalue/lexer/tagvalue.py new file mode 100644 index 000000000..e6737db47 --- /dev/null +++ b/src/spdx/parser/tagvalue/lexer/tagvalue.py @@ -0,0 +1,242 @@ +# Copyright (c) 2014 Ahmed H. Ismail +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ply import lex +from ply.lex import TOKEN + + +class SPDXLexer(object): + reserved = { + # Top level fields + "SPDXVersion": "DOC_VERSION", + "DataLicense": "DOC_LICENSE", + "DocumentName": "DOC_NAME", + "SPDXID": "SPDX_ID", + "DocumentComment": "DOC_COMMENT", + "DocumentNamespace": "DOC_NAMESPACE", + "ExternalDocumentRef": "EXT_DOC_REF", + # Creation info fields + "Creator": "CREATOR", + "Created": "CREATED", + "CreatorComment": "CREATOR_COMMENT", + "LicenseListVersion": "LIC_LIST_VER", + # Annotation fields + "Annotator": "ANNOTATOR", + "AnnotationDate": "ANNOTATION_DATE", + "AnnotationComment": "ANNOTATION_COMMENT", + "AnnotationType": "ANNOTATION_TYPE", + "SPDXREF": "ANNOTATION_SPDX_ID", + # Relationship fields + "Relationship": "RELATIONSHIP", + "RelationshipComment": "RELATIONSHIP_COMMENT", + # Package fields + "PackageName": "PKG_NAME", + "PackageVersion": "PKG_VERSION", + "PackageDownloadLocation": "PKG_DOWN", + "FilesAnalyzed": "PKG_FILES_ANALYZED", + "PackageSummary": "PKG_SUM", + "PackageSourceInfo": "PKG_SRC_INFO", + "PackageFileName": "PKG_FILE_NAME", + "PackageSupplier": "PKG_SUPPL", + "PackageOriginator": "PKG_ORIG", + "PackageChecksum": "PKG_CHECKSUM", + "PackageVerificationCode": "PKG_VERF_CODE", + "PackageDescription": "PKG_DESC", + "PackageComment": "PKG_COMMENT", + "PackageLicenseDeclared": "PKG_LICS_DECL", + "PackageLicenseConcluded": "PKG_LICS_CONC", + "PackageLicenseInfoFromFiles": "PKG_LICS_FFILE", + "PackageLicenseComments": "PKG_LICS_COMMENT", + "PackageCopyrightText": "PKG_CPY_TEXT", + "PackageHomePage": "PKG_HOME", + "ExternalRef": "PKG_EXT_REF", + "ExternalRefComment": "PKG_EXT_REF_COMMENT", + "PackageAttributionText": "PKG_ATTRIBUTION_TEXT", + "PrimaryPackagePurpose": "PRIMARY_PACKAGE_PURPOSE", + "BuiltDate": "BUILT_DATE", + "ReleaseDate": "RELEASE_DATE", + "ValidUntilDate": "VALID_UNTIL_DATE", + # File fields + "FileName": "FILE_NAME", + "FileType": "FILE_TYPE", + "FileChecksum": "FILE_CHECKSUM", + "LicenseConcluded": "FILE_LICS_CONC", + "LicenseInfoInFile": "FILE_LICS_INFO", + "FileCopyrightText": "FILE_CR_TEXT", + "LicenseComments": "FILE_LICS_COMMENT", + "FileComment": "FILE_COMMENT", + "FileNotice": "FILE_NOTICE", + "FileContributor": "FILE_CONTRIB", + "FileAttributionText": "FILE_ATTRIBUTION_TEXT", + # ExtractedLicensingInfo fields + "LicenseID": "LICS_ID", + "ExtractedText": "LICS_TEXT", + "LicenseName": "LICS_NAME", + "LicenseCrossReference": "LICS_CRS_REF", + "LicenseComment": "LICS_COMMENT", + # Snippet fields + "SnippetSPDXID": "SNIPPET_SPDX_ID", + "SnippetName": "SNIPPET_NAME", + "SnippetComment": "SNIPPET_COMMENT", + "SnippetCopyrightText": "SNIPPET_CR_TEXT", + "SnippetLicenseComments": "SNIPPET_LICS_COMMENT", + "SnippetFromFileSPDXID": "SNIPPET_FILE_SPDXID", + "SnippetLicenseConcluded": "SNIPPET_LICS_CONC", + "LicenseInfoInSnippet": "SNIPPET_LICS_INFO", + "SnippetAttributionText": "SNIPPET_ATTRIBUTION_TEXT", + "SnippetByteRange": "SNIPPET_BYTE_RANGE", + "SnippetLineRange": "SNIPPET_LINE_RANGE", + # Common fields + "NOASSERTION": "NO_ASSERTION", + "NONE": "NONE", + "SOURCE": "SOURCE", + "BINARY": "BINARY", + "ARCHIVE": "ARCHIVE", + "APPLICATION": "APPLICATION", + "AUDIO": "AUDIO", + "IMAGE": "IMAGE", + "TEXT": "FILETYPE_TEXT", + "VIDEO": "VIDEO", + "DOCUMENTATION": "DOCUMENTATION", + "SPDX": "SPDX", + "OTHER": "OTHER", + "REVIEW": "REVIEW", + "FRAMEWORK": "FRAMEWORK", + "LIBRARY": "LIBRARY", + "CONTAINER": "CONTAINER", + "OPERATING-SYSTEM": "OPERATING_SYSTEM", + "DEVICE": "DEVICE", + "FIRMWARE": "FIRMWARE", + "FILE": "FILE", + "INSTALL": "INSTALL" + } + states = (("text", "exclusive"),) + + tokens = [ + "TEXT", + "TOOL_VALUE", + "UNKNOWN_TAG", + "ORG_VALUE", + "PERSON_VALUE", + "DATE", + "LINE", + "CHECKSUM", + "DOC_REF_ID", + "DOC_URI", + "EXT_DOC_REF_CHECKSUM", + ] + list(reserved.values()) + + def __init__(self): + self.lexer = None + + @TOKEN(r":\s*") + def t_text(self, t): + t.lexer.text_start = t.lexer.lexpos - len("") + t.lexer.begin("text") + + @TOKEN(r"\s*") + def t_text_end(self, t): + t.type = "TEXT" + t.value = t.lexer.lexdata[t.lexer.text_start: t.lexer.lexpos] + t.lexer.lineno += t.value.count("\n") + t.value = t.value.strip() + t.lexer.begin("INITIAL") + return t + + @TOKEN(r".|\n") + def t_text_any(self, t): + pass + + def t_text_error(self, t): + print("Lexer error in text state") + + @TOKEN( + r":\s*(ADLER32|BLAKE2b-256|BLAKE2b-384|BLAKE2b-512|BLAKE3|MD2|MD4|MD5|MD6|SHA1|SHA224|SHA256|SHA384|SHA512|SHA3-256|SHA3-384|SHA3-512):\s*([a-f0-9]*)") + def t_CHECKSUM(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)") + def t_DOC_REF_ID(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r"\s*((ht|f)tps?:\/\/\S*)") + def t_DOC_URI(self, t): + t.value = t.value.strip() + return t + + @TOKEN(r"\s*SHA1:\s*[a-f0-9]{40}") + def t_EXT_DOC_REF_CHECKSUM(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r":\s*Tool:.+") + def t_TOOL_VALUE(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r":\s*Organization:.+") + def t_ORG_VALUE(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r":\s*Person:.+") + def t_PERSON_VALUE(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r":\s*\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ") + def t_DATE(self, t): + t.value = t.value[1:].strip() + return t + + @TOKEN(r"[a-zA-Z]+") + def t_KEYWORD_AS_TAG(self, t): + t.type = self.reserved.get(t.value, "UNKNOWN_TAG") + t.value = t.value.strip() + return t + + @TOKEN(r":.+") + def t_LINE_OR_KEYWORD_VALUE(self, t): + t.value = t.value[1:].strip() + if t.value in self.reserved.keys(): + t.type = self.reserved[t.value] + else: + t.type = "LINE" + return t + + @TOKEN(r"\#.*") + def t_comment(self, t): + pass + + @TOKEN(r"\n+") + def t_newline(self, t): + t.lexer.lineno += len(t.value) + + @TOKEN(r"[ \t]+") + def t_whitespace(self, t): + pass + + def build(self, **kwargs): + self.lexer = lex.lex(module=self, **kwargs) + + def token(self): + return self.lexer.token() + + def input(self, data): + self.lexer.input(data) + + def t_error(self, t): + t.lexer.skip(1) + t.value = "Lexer error" + return t diff --git a/tests/spdx/parser/tagvalue/__init__.py b/tests/spdx/parser/tagvalue/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py new file mode 100644 index 000000000..ce6b9a159 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py @@ -0,0 +1,288 @@ +# Copyright (c) 2014 Ahmed H. Ismail +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest import TestCase + +import pytest + +from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer + + +@pytest.fixture +def lexer(): + lexer = SPDXLexer() + lexer.build() + return lexer + + +def token_assert_helper(token, token_type, value, line_number): + assert token.type == token_type + assert token.value == value + assert token.lineno == line_number + + +def test_tokenization_of_document(lexer): + document_str = '\n'.join([ + 'SPDXVersion: SPDX-2.1', + 'DataLicense: CC0-1.0', + 'DocumentName: Sample_Document-V2.1', + 'SPDXID: SPDXRef-DOCUMENT', + 'DocumentComment: Sample Comment', + 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + ]) + lexer.input(document_str) + token_assert_helper(lexer.token(), 'DOC_VERSION', 'SPDXVersion', 1) + token_assert_helper(lexer.token(), 'LINE', 'SPDX-2.1', 1) + token_assert_helper(lexer.token(), 'DOC_LICENSE', 'DataLicense', 2) + token_assert_helper(lexer.token(), 'LINE', 'CC0-1.0', 2) + token_assert_helper(lexer.token(), 'DOC_NAME', 'DocumentName', 3) + token_assert_helper(lexer.token(), 'LINE', 'Sample_Document-V2.1', 3) + token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 4) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT', 4) + token_assert_helper(lexer.token(), 'DOC_COMMENT', 'DocumentComment', 5) + token_assert_helper(lexer.token(), 'TEXT', 'Sample Comment', 5) + token_assert_helper(lexer.token(), 'DOC_NAMESPACE', 'DocumentNamespace', 6) + token_assert_helper(lexer.token(), 'LINE', + 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', 6) + + +def test_tokenization_of_external_document_references(lexer): + data = ''' + ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759 + ''' + lexer.input(data) + token_assert_helper(lexer.token(), 'EXT_DOC_REF', 'ExternalDocumentRef', 2) + token_assert_helper(lexer.token(), 'DOC_REF_ID', 'DocumentRef-spdx-tool-2.1', 2) + token_assert_helper(lexer.token(), 'DOC_URI', 'http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25' + '04E0-4F89-41D3-9A0C-0305E82C3301', 2) + token_assert_helper(lexer.token(), 'EXT_DOC_REF_CHECKSUM', 'SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', 2) + + +def test_tokenization_of_file(lexer): + file_str = '\n'.join([ + 'FileName: testfile.java', + 'SPDXID: SPDXRef-File', + 'FileType: SOURCE', + 'FileType: TEXT', + 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'LicenseConcluded: Apache-2.0', + 'LicenseInfoInFile: Apache-2.0', + 'FileCopyrightText: Copyright 2014 Acme Inc.', + 'FileComment: Very long file', + 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + ]) + + lexer.input(file_str) + token_assert_helper(lexer.token(), 'FILE_NAME', 'FileName', 1) + token_assert_helper(lexer.token(), 'LINE', 'testfile.java', 1) + token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 2) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-File', 2) + token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 3) + token_assert_helper(lexer.token(), 'SOURCE', 'SOURCE', 3) + token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 4) + token_assert_helper(lexer.token(), 'FILETYPE_TEXT', 'TEXT', 4) + token_assert_helper(lexer.token(), 'FILE_CHECKSUM', 'FileChecksum', 5) + token_assert_helper(lexer.token(), 'CHECKSUM', 'SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 5) + token_assert_helper(lexer.token(), 'FILE_LICS_CONC', 'LicenseConcluded', 6) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 6) + token_assert_helper(lexer.token(), 'FILE_LICS_INFO', 'LicenseInfoInFile', 7) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 7) + token_assert_helper(lexer.token(), 'FILE_CR_TEXT', 'FileCopyrightText', 8) + token_assert_helper(lexer.token(), 'TEXT', 'Copyright 2014 Acme Inc.', 8) + token_assert_helper(lexer.token(), 'FILE_COMMENT', 'FileComment', 9) + token_assert_helper(lexer.token(), 'TEXT', 'Very long file', 9) + token_assert_helper(lexer.token(), 'FILE_ATTRIBUTION_TEXT', 'FileAttributionText', 10) + token_assert_helper(lexer.token(), 'TEXT', + 'Acknowledgements that might be required to be communicated in some contexts.', + 10) + + +def test_tokenization_of_creation_info(lexer): + creation_str = '\n'.join([ + 'Creator: Person: Bob (bob@example.com)', + 'Creator: Organization: Acme.', + 'Created: 2010-02-03T00:00:00Z', + 'CreatorComment: Sample Comment' + ]) + + lexer.input(creation_str) + token_assert_helper(lexer.token(), 'CREATOR', 'Creator', 1) + token_assert_helper(lexer.token(), 'PERSON_VALUE', "Person: Bob (bob@example.com)", 1) + token_assert_helper(lexer.token(), 'CREATOR', 'Creator', 2) + token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization: Acme.', 2) + token_assert_helper(lexer.token(), 'CREATED', 'Created', 3) + token_assert_helper(lexer.token(), 'DATE', '2010-02-03T00:00:00Z', 3) + token_assert_helper(lexer.token(), 'CREATOR_COMMENT', 'CreatorComment', 4) + token_assert_helper(lexer.token(), 'TEXT', 'Sample Comment', 4) + + +def test_tokenization_of_package(lexer): + package_str = '\n'.join([ + 'PackageName: Test', + 'SPDXID: SPDXRef-Package', + 'PackageVersion: Version 0.9.2', + 'PackageDownloadLocation: http://example.com/test', + 'FilesAnalyzed: True', + 'PackageSummary: Test package', + 'PackageSourceInfo: Version 1.0 of test', + 'PackageFileName: test-1.0.zip', + 'PackageSupplier: Organization:ACME', + 'PackageOriginator: Organization:ACME', + 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', + 'PackageDescription: A package.', + 'PackageComment: Comment on the package.', + 'PackageCopyrightText: Copyright 2014 Acme Inc.', + 'PackageLicenseDeclared: Apache-2.0', + 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', + 'PackageLicenseInfoFromFiles: Apache-1.0', + 'PackageLicenseInfoFromFiles: Apache-2.0', + 'PackageLicenseComments: License Comments', + 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', + 'ExternalRefComment: Some comment about the package.', + 'PrimaryPackagePurpose: OPERATING-SYSTEM', + 'BuiltDate: 2020-01-01T12:00:00Z', + 'ReleaseDate: 2021-01-01T12:00:00Z', + 'ValidUntilDate: 2022-01-01T12:00:00Z' + ]) + + lexer.input(package_str) + token_assert_helper(lexer.token(), 'PKG_NAME', 'PackageName', 1) + token_assert_helper(lexer.token(), 'LINE', 'Test', 1) + token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 2) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-Package', 2) + token_assert_helper(lexer.token(), 'PKG_VERSION', 'PackageVersion', 3) + token_assert_helper(lexer.token(), 'LINE', 'Version 0.9.2', 3) + token_assert_helper(lexer.token(), 'PKG_DOWN', 'PackageDownloadLocation', 4) + token_assert_helper(lexer.token(), 'LINE', 'http://example.com/test', 4) + token_assert_helper(lexer.token(), 'PKG_FILES_ANALYZED', 'FilesAnalyzed', 5) + token_assert_helper(lexer.token(), 'LINE', 'True', 5) + token_assert_helper(lexer.token(), 'PKG_SUM', 'PackageSummary', 6) + token_assert_helper(lexer.token(), 'TEXT', 'Test package', 6) + token_assert_helper(lexer.token(), 'PKG_SRC_INFO', 'PackageSourceInfo', 7) + token_assert_helper(lexer.token(), 'TEXT', 'Version 1.0 of test', 7) + token_assert_helper(lexer.token(), 'PKG_FILE_NAME', 'PackageFileName', 8) + token_assert_helper(lexer.token(), 'LINE', 'test-1.0.zip', 8) + token_assert_helper(lexer.token(), 'PKG_SUPPL', 'PackageSupplier', 9) + token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization:ACME', 9) + token_assert_helper(lexer.token(), 'PKG_ORIG', 'PackageOriginator', 10) + token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization:ACME', 10) + token_assert_helper(lexer.token(), 'PKG_CHECKSUM', 'PackageChecksum', 11) + token_assert_helper(lexer.token(), 'CHECKSUM', 'SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 11) + token_assert_helper(lexer.token(), 'PKG_VERF_CODE', 'PackageVerificationCode', 12) + token_assert_helper(lexer.token(), 'LINE', + '4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', 12) + token_assert_helper(lexer.token(), 'PKG_DESC', 'PackageDescription', 13) + token_assert_helper(lexer.token(), 'TEXT', 'A package.', 13) + token_assert_helper(lexer.token(), 'PKG_COMMENT', 'PackageComment', 14) + token_assert_helper(lexer.token(), 'TEXT', 'Comment on the package.', 14) + token_assert_helper(lexer.token(), 'PKG_CPY_TEXT', 'PackageCopyrightText', 15) + token_assert_helper(lexer.token(), 'TEXT', ' Copyright 2014 Acme Inc.', 15) + token_assert_helper(lexer.token(), 'PKG_LICS_DECL', 'PackageLicenseDeclared', 16) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 16) + token_assert_helper(lexer.token(), 'PKG_LICS_CONC', 'PackageLicenseConcluded', 17) + token_assert_helper(lexer.token(), 'LINE', '(LicenseRef-2.0 and Apache-2.0)', 17) + token_assert_helper(lexer.token(), 'PKG_LICS_FFILE', 'PackageLicenseInfoFromFiles', 18) + token_assert_helper(lexer.token(), 'LINE', 'Apache-1.0', 18) + token_assert_helper(lexer.token(), 'PKG_LICS_FFILE', 'PackageLicenseInfoFromFiles', 19) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 19) + token_assert_helper(lexer.token(), 'PKG_LICS_COMMENT', 'PackageLicenseComments', 20) + token_assert_helper(lexer.token(), 'TEXT', 'License Comments', 20) + token_assert_helper(lexer.token(), 'PKG_EXT_REF', 'ExternalRef', 21) + token_assert_helper(lexer.token(), 'LINE', + 'SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', 21) + token_assert_helper(lexer.token(), 'PKG_EXT_REF_COMMENT', 'ExternalRefComment', 22) + token_assert_helper(lexer.token(), 'TEXT', 'Some comment about the package.', 22) + token_assert_helper(lexer.token(), 'PRIMARY_PACKAGE_PURPOSE', 'PrimaryPackagePurpose', 23) + token_assert_helper(lexer.token(), 'OPERATING_SYSTEM', 'OPERATING-SYSTEM', 23) + token_assert_helper(lexer.token(), 'BUILT_DATE', 'BuiltDate', 24) + token_assert_helper(lexer.token(), 'DATE', '2020-01-01T12:00:00Z', 24) + token_assert_helper(lexer.token(), 'RELEASE_DATE', 'ReleaseDate', 25) + token_assert_helper(lexer.token(), 'DATE', '2021-01-01T12:00:00Z', 25) + token_assert_helper(lexer.token(), 'VALID_UNTIL_DATE', 'ValidUntilDate', 26) + token_assert_helper(lexer.token(), 'DATE', '2022-01-01T12:00:00Z', 26) + + +def test_tokenization_of_unknown_tag(lexer): + unknown_tag_str = 'SomeUnknownTag: SomeUnknownValue' + lexer.input(unknown_tag_str) + token_assert_helper(lexer.token(), 'UNKNOWN_TAG', 'SomeUnknownTag', 1) + token_assert_helper(lexer.token(), 'LINE', 'SomeUnknownValue', 1) + + +def test_tokenization_of_snippet(lexer): + snippet_str = '\n'.join([ + 'SnippetSPDXID: SPDXRef-Snippet', + 'SnippetLicenseComments: Some lic comment.', + 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', + 'SnippetComment: Some snippet comment.', + 'SnippetName: from linux kernel', + 'SnippetFromFileSPDXID: SPDXRef-DoapSource', + 'SnippetLicenseConcluded: Apache-2.0', + 'LicenseInfoInSnippet: Apache-2.0', + 'SnippetByteRange: 310:420', + 'SnippetLineRange: 5:23', + ]) + lexer.input(snippet_str) + token_assert_helper(lexer.token(), 'SNIPPET_SPDX_ID', 'SnippetSPDXID', 1) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-Snippet', 1) + token_assert_helper(lexer.token(), 'SNIPPET_LICS_COMMENT', 'SnippetLicenseComments', 2) + token_assert_helper(lexer.token(), 'TEXT', 'Some lic comment.', 2) + token_assert_helper(lexer.token(), 'SNIPPET_CR_TEXT', 'SnippetCopyrightText', 3) + token_assert_helper(lexer.token(), 'TEXT', ' Copyright 2008-2010 John Smith ', 3) + token_assert_helper(lexer.token(), 'SNIPPET_COMMENT', 'SnippetComment', 4) + token_assert_helper(lexer.token(), 'TEXT', 'Some snippet comment.', 4) + token_assert_helper(lexer.token(), 'SNIPPET_NAME', 'SnippetName', 5) + token_assert_helper(lexer.token(), 'LINE', 'from linux kernel', 5) + token_assert_helper(lexer.token(), 'SNIPPET_FILE_SPDXID', 'SnippetFromFileSPDXID', 6) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DoapSource', 6) + token_assert_helper(lexer.token(), 'SNIPPET_LICS_CONC', + 'SnippetLicenseConcluded', 7) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 7) + token_assert_helper(lexer.token(), 'SNIPPET_LICS_INFO', 'LicenseInfoInSnippet', 8) + token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 8) + token_assert_helper(lexer.token(), 'SNIPPET_BYTE_RANGE', 'SnippetByteRange', 9) + token_assert_helper(lexer.token(), 'LINE', '310:420', 9) + token_assert_helper(lexer.token(), 'SNIPPET_LINE_RANGE', 'SnippetLineRange', 10) + token_assert_helper(lexer.token(), 'LINE', '5:23', 10) + + +def test_tokenization_of_annotation(lexer): + annotation_str = '\n'.join([ + 'Annotator: Person: Jane Doe()', + 'AnnotationDate: 2010-01-29T18:30:22Z', + 'AnnotationComment: Document level annotation', + 'AnnotationType: OTHER', + 'SPDXREF: SPDXRef-DOCUMENT' + ]) + + lexer.input(annotation_str) + token_assert_helper(lexer.token(), 'ANNOTATOR', 'Annotator', 1) + token_assert_helper(lexer.token(), 'PERSON_VALUE', 'Person: Jane Doe()', 1) + token_assert_helper(lexer.token(), 'ANNOTATION_DATE', 'AnnotationDate', 2) + token_assert_helper(lexer.token(), 'DATE', '2010-01-29T18:30:22Z', 2) + token_assert_helper(lexer.token(), 'ANNOTATION_COMMENT', 'AnnotationComment', 3) + token_assert_helper(lexer.token(), 'TEXT', 'Document level annotation', 3) + token_assert_helper(lexer.token(), 'ANNOTATION_TYPE', 'AnnotationType', 4) + token_assert_helper(lexer.token(), 'OTHER', 'OTHER', 4) + token_assert_helper(lexer.token(), 'ANNOTATION_SPDX_ID', 'SPDXREF', 5) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT', 5) + + +def test_tokenization_of_relationship(lexer): + relationship_str = '\n'.join(['Relationship: SPDXRef-DOCUMENT DESCRIBES NONE', + 'RelationshipComment: This is a comment.']) + + lexer.input(relationship_str) + token_assert_helper(lexer.token(), 'RELATIONSHIP', 'Relationship', 1) + token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT DESCRIBES NONE', 1) + token_assert_helper(lexer.token(), 'RELATIONSHIP_COMMENT', 'RelationshipComment', 2) + token_assert_helper(lexer.token(), 'LINE', 'This is a comment.', 2) From 4e5ec6864ab677e3f77ceab740b66c8507919e70 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 27 Feb 2023 12:02:11 +0100 Subject: [PATCH 02/43] [issue-382] add tag-value parser The code is taken from the current implementation, I added a decorator function to use instead of docstrings and adapted the code according to the new data model. Signed-off-by: Meret Behrens --- .gitignore | 2 +- README.md | 1 + pyproject.toml | 2 +- src/spdx/model/package.py | 6 +- src/spdx/parser/tagvalue/parser/__init__.py | 0 .../parser/tagvalue/parser/helper_methods.py | 31 + src/spdx/parser/tagvalue/parser/tagvalue.py | 907 ++++++++++++++++++ .../parser/tagvalue/test_tag_value_parser.py | 246 +++++ 8 files changed, 1190 insertions(+), 5 deletions(-) create mode 100644 src/spdx/parser/tagvalue/parser/__init__.py create mode 100644 src/spdx/parser/tagvalue/parser/helper_methods.py create mode 100644 src/spdx/parser/tagvalue/parser/tagvalue.py create mode 100644 tests/spdx/parser/tagvalue/test_tag_value_parser.py diff --git a/.gitignore b/.gitignore index cc02c2461..5ef28e630 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ /build/ /dist/ /tmp/ -spdx/parsers/parsetab.py +src/spdx/parser/tagvalue/parser/parsetab.py /.cache/ .tox diff --git a/README.md b/README.md index 9416994b9..e1681a7c2 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ if not validation_messages: * PyYAML: https://pypi.org/project/PyYAML/ for handling YAML. * xmltodict: https://pypi.org/project/xmltodict/ for handling XML. * rdflib: https://pypi.python.org/pypi/rdflib/ for handling RDF. +* ply: https://pypi.org/project/ply/ for handling tag-value. * click: https://pypi.org/project/click/ for creating the CLI interface. * typeguard: https://pypi.org/project/typeguard/ for type checking. * uritools: https://pypi.org/project/uritools/ for validation of URIs. diff --git a/pyproject.toml b/pyproject.toml index 282f56d38..933f1d264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ ] urls = { Homepage = "https://github.com/spdx/tools-python" } requires-python = ">=3.7" -dependencies = ["click", "pyyaml", "xmltodict", "rdflib", "typeguard", "uritools", "license_expression"] +dependencies = ["click", "pyyaml", "xmltodict", "rdflib", "typeguard", "uritools", "license_expression", "ply"] dynamic = ["version"] [project.optional-dependencies] diff --git a/src/spdx/model/package.py b/src/spdx/model/package.py index 7f4e32296..a6a4eaf4c 100644 --- a/src/spdx/model/package.py +++ b/src/spdx/model/package.py @@ -55,9 +55,9 @@ class ExternalPackageRefCategory(Enum): CATEGORY_TO_EXTERNAL_PACKAGE_REF_TYPES: Dict[ExternalPackageRefCategory, List[str]] = { - ExternalPackageRefCategory.SECURITY : ["cpe22Type", "cpe23Type", "advisory", "fix", "url", "swid"], - ExternalPackageRefCategory.PACKAGE_MANAGER : ["maven-central", "npm", "nuget", "bower", "purl"], - ExternalPackageRefCategory.PERSISTENT_ID : ["swh", "gitoid"], + ExternalPackageRefCategory.SECURITY: ["cpe22Type", "cpe23Type", "advisory", "fix", "url", "swid"], + ExternalPackageRefCategory.PACKAGE_MANAGER: ["maven-central", "npm", "nuget", "bower", "purl"], + ExternalPackageRefCategory.PERSISTENT_ID: ["swh", "gitoid"], ExternalPackageRefCategory.OTHER: [] } diff --git a/src/spdx/parser/tagvalue/parser/__init__.py b/src/spdx/parser/tagvalue/parser/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py new file mode 100644 index 000000000..32090810a --- /dev/null +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -0,0 +1,31 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +from typing import Optional + + +def grammar_rule(doc): + # this is a helper method to use decorators for the parsing methods instead of docstrings + def decorate(func): + func.__doc__ = doc + return func + return decorate + + +def str_from_text(text: Optional[str]) -> Optional[str]: + regex = re.compile("((.|\n)+)", re.UNICODE) + match = regex.match(text) + if match: + return match.group(1) + elif isinstance(text, str): + return text + else: + return None diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py new file mode 100644 index 000000000..7909f35d9 --- /dev/null +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -0,0 +1,907 @@ +# Copyright (c) 2014 Ahmed H. Ismail +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Optional + +from license_expression import get_spdx_licensing +from ply import yacc + +from spdx.datetime_conversions import datetime_from_str +from spdx.model.annotation import AnnotationType, Annotation +from spdx.model.checksum import ChecksumAlgorithm, Checksum +from spdx.model.external_document_ref import ExternalDocumentRef +from spdx.model.extracted_licensing_info import ExtractedLicensingInfo +from spdx.model.package import Package, PackageVerificationCode, PackagePurpose, ExternalPackageRef, \ + ExternalPackageRefCategory +from spdx.model.relationship import Relationship, RelationshipType +from spdx.model.snippet import Snippet +from spdx.model.version import Version +from spdx.parser.jsonlikedict.actor_parser import ActorParser + +from spdx.model.document import Document, CreationInfo +from spdx.model.file import File, FileType +from spdx.model.spdx_no_assertion import SpdxNoAssertion +from spdx.model.spdx_none import SpdxNone +from spdx.parser.jsonlikedict.dict_parsing_functions import construct_or_raise_parsing_error +from spdx.parser.logger import Logger +from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer +from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text + + +class Parser(object): + def __init__(self): + self.lex = None + self.yacc = None + self.tokens = SPDXLexer.tokens + self.logger = Logger() + self.element_stack = [] + self.current_element = dict() + self.creation_info = dict() + self.elements_build = dict() + + @grammar_rule("start : start attrib ") + def p_start_1(self, p): + pass + + @grammar_rule("start : attrib ") + def p_start_2(self, p): + pass + + @grammar_rule("attrib : spdx_version\n| spdx_id\n| data_lics\n| doc_name\n| doc_comment\n| doc_namespace\n| " + "creator\n| created\n| creator_comment\n| lics_list_ver\n| ext_doc_ref\n" + # attributes for file + "| file_name\n| file_type\n| file_checksum\n| file_conc\n| file_lics_info\n| file_cr_text\n" + "| file_lics_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contrib\n" + # attributes for annotation + "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" + # attributes for relationship + "| relationship\n| relationship_comment\n" + # attributes for snippet + "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" + "| snip_lic_comment\n| snip_file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" + "| snip_line_range\n" + # attributes for package + "| package_name\n| package_version\n| pkg_down_location\n| pkg_files_analyzed\n| pkg_home\n" + "| pkg_summary\n| pkg_src_info\n| pkg_file_name\n| pkg_supplier\n| pkg_orig\n| pkg_checksum\n" + "| pkg_verif\n| pkg_desc\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" + "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" + "| built_date\n| release_date\n| valid_until_date\n" + # attributes for extracted licensing info + "| extr_lic_id\n| extr_lic_text\n| extr_lic_name\n| lic_xref\n| lic_comment\n" + "| unknown_tag ") + def p_attrib(self, p): + pass + + # general parsing methods + @grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG DATE\n | UNKNOWN_TAG PERSON_VALUE") + def p_unknown_tag(self, p): + self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") + + @grammar_rule("text_or_line : TEXT") + def p_text_or_line_value_1(self, p): + p[0] = str_from_text(p[1]) + + @grammar_rule("text_or_line : LINE") + def p_text_or_line_value_2(self, p): + p[0] = p[1] + + @grammar_rule("license_or_no_assertion_or_none : NO_ASSERTION") + def p_license_or_no_assertion_or_none_1(self, p): + p[0] = SpdxNoAssertion() + + @grammar_rule("license_or_no_assertion_or_none : NONE") + def p_license_or_no_assertion_or_none_2(self, p): + p[0] = SpdxNone() + + @grammar_rule("license_or_no_assertion_or_none : LINE") + def p_license_or_no_assertion_or_none_3(self, p): + p[0] = get_spdx_licensing().parse(p[1]) + + @grammar_rule("line_or_no_assertion : LINE") + def p_line_or_no_assertion_1(self, p): + p[0] = p[1] + + @grammar_rule("line_or_no_assertion : NO_ASSERTION") + def p_line_or_no_assertion_2(self, p): + p[0] = SpdxNoAssertion() + + @grammar_rule("line_or_no_assertion_or_none : text_or_line") + def p_line_1(self, p): + p[0] = p[1] + + @grammar_rule("line_or_no_assertion_or_none : NO_ASSERTION") + def p_no_assertion_2(self, p): + p[0] = SpdxNoAssertion() + + @grammar_rule("line_or_no_assertion_or_none : NONE") + def p_none_2(self, p): + p[0] = SpdxNoAssertion() + + @grammar_rule("spdx_id : SPDX_ID LINE") + def p_spdx_id(self, p): + # We assume that the documents spdx_id is defined first in the SPDXDocument, before any package or file + # information. If this is not the case the parser will behave unexpectedly as the spdx_ids are assigned falsy. + if "spdx_id" in self.creation_info: + self.current_element["spdx_id"] = p[2] + else: + self.creation_info["spdx_id"] = p[2] + + # parsing methods for creation info / document level + + @grammar_rule("lics_list_ver : LIC_LIST_VER LINE") + def p_lics_list_ver_1(self, p): + self.creation_info["license_list_version"] = Version.from_string(p[2]) + + @grammar_rule("lics_list_ver : LIC_LIST_VER error") + def p_lics_list_ver_2(self, p): + self.logger.append( + f"Error while parsing LicenseListVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("doc_comment : DOC_COMMENT text_or_line") + def p_doc_comment_1(self, p): + self.creation_info["document_comment"] = p[2] + + @grammar_rule("doc_comment : DOC_COMMENT error") + def p_doc_comment_2(self, p): + self.logger.append( + f"Error while parsing DocumentComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("doc_namespace : DOC_NAMESPACE LINE") + def p_doc_namespace_1(self, p): + self.creation_info["document_namespace"] = p[2] + + @grammar_rule("doc_namespace : DOC_NAMESPACE error") + def p_doc_namespace_2(self, p): + self.logger.append( + f"Error while parsing DocumentNamespace: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("data_lics : DOC_LICENSE LINE") + def p_data_license_1(self, p): + self.creation_info["data_license"] = p[2] + + @grammar_rule("data_lics : DOC_LICENSE error") + def p_data_license_2(self, p): + self.logger.append( + f"Error while parsing DataLicense: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("doc_name : DOC_NAME LINE") + def p_doc_name_1(self, p): + self.creation_info["name"] = p[2] + + @grammar_rule("doc_name : DOC_NAME error") + def p_doc_name_2(self, p): + self.logger.append( + f"Error while parsing DocumentName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") + def p_ext_doc_refs_1(self, p): + + document_ref_id = p[2] + document_uri = p[3] + splitted_checksum = p[4].split(":") + algorithm = ChecksumAlgorithm[splitted_checksum[0]] + value = splitted_checksum[1].strip() + external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, Checksum(algorithm, value)) + self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) + + @grammar_rule("ext_doc_ref : EXT_DOC_REF error") + def p_ext_doc_refs_2(self, p): + self.logger.append( + f"Error while parsing ExternalDocumentRef: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("spdx_version : DOC_VERSION LINE") + def p_spdx_version_1(self, p): + self.creation_info["spdx_version"] = p[2] + + @grammar_rule("spdx_version : DOC_VERSION error") + def p_spdx_version_2(self, p): + self.logger.append( + f"Error while parsing SPDXVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") + def p_creator_comment_1(self, p): + self.creation_info["creator_comment"] = p[2] + + @grammar_rule("creator_comment : CREATOR_COMMENT error") + def p_creator_comment_2(self, p): + self.logger.append( + f"Error while parsing CreatorComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + def p_creator_1(self, p): + """creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE""" + self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) + + @grammar_rule("creator : CREATOR error") + def p_creator_2(self, p): + self.logger.append( + f"Error while parsing Creator: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("created : CREATED DATE") + def p_created_1(self, p): + self.creation_info["created"] = datetime_from_str(p[2]) + + @grammar_rule("created : CREATED error") + def p_created_2(self, p): + self.logger.append( + f"Error while parsing Created: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + # parsing methods for extracted licensing info + + @grammar_rule("extr_lic_id : LICS_ID LINE") + def p_extr_lic_id_1(self, p): + self.construct_current_element() + self.current_element["class"] = ExtractedLicensingInfo + self.current_element["license_id"] = p[2] + + @grammar_rule("extr_lic_id : LICS_ID error") + def p_extr_lic_id_2(self, p): + self.logger.append( + f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("lic_xref : LICS_CRS_REF LINE") + def p_lic_xref_1(self, p): + self.current_element.setdefault("cross_references", []).append(p[2]) + + @grammar_rule("lic_xref : LICS_CRS_REF error") + def p_lic_xref_2(self, p): + self.logger.append(f"Error while parsing LicenseCrossReference: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("lic_comment : LICS_COMMENT text_or_line") + def p_lic_comment_1(self, p): + self.current_element["comment"] = p[2] + + @grammar_rule("lic_comment : LICS_COMMENT error") + def p_lic_comment_2(self, p): + self.logger.append( + f"Error while parsing LicenseComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("extr_lic_name : LICS_NAME line_or_no_assertion") + def p_extr_lic_name_1(self, p): + self.current_element["license_name"] = p[2] + + @grammar_rule("extr_lic_name : LICS_NAME error") + def p_extr_lic_name_2(self, p): + self.logger.append( + f"Error while parsing LicenseName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("extr_lic_text : LICS_TEXT text_or_line") + def p_extr_lic_text_1(self, p): + self.current_element["extracted_text"] = p[2] + + @grammar_rule("extr_lic_text : LICS_TEXT error") + def p_extr_lic_text_2(self, p): + self.logger.append( + f"Error while parsing ExtractedText: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + # parsing methods for file + + @grammar_rule("file_name : FILE_NAME LINE") + def p_file_name_1(self, p): + self.construct_current_element() + self.element_stack.append(p[2]) + self.current_element = dict() + self.current_element["name"] = p[2] + self.current_element["class"] = File + + @grammar_rule("file_name : FILE_NAME error") + def p_file_name_2(self, p): + self.logger.append( + f"Error while parsing FileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_contrib : FILE_CONTRIB LINE") + def p_file_contrib_1(self, p): + self.current_element.setdefault("contributors", []).append(p[2]) + + @grammar_rule("file_contrib : FILE_CONTRIB error") + def p_file_contrib_2(self, p): + self.logger.append( + f"Error while parsing FileContributor: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_notice : FILE_NOTICE text_or_line") + def p_file_notice_1(self, p): + self.current_element["notice"] = p[2] + + @grammar_rule("file_notice : FILE_NOTICE error") + def p_file_notice_2(self, p): + self.logger.append( + f"Error while parsing FileNotice: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") + def p_file_cr_text_1(self, p): + + self.current_element["copyright_text"] = p[2] + + @grammar_rule("file_cr_text : FILE_CR_TEXT error") + def p_file_cr_text_2(self, p): + self.logger.append( + f"Error while parsing FileCopyrightText: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") + def p_file_lics_comment_1(self, p): + self.current_element["license_comment"] = p[2] + + @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") + def p_file_lics_comment_2(self, p): + self.logger.append(f"Error while parsing LicenseComments in file: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") + def p_file_attribution_text_1(self, p): + self.current_element.setdefault("attribution_texts", []).append(p[2]) + + @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT error") + def p_file_attribution_text_2(self, p): + self.logger.append( + f"Error while parsing FileAttributionText: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") + def p_file_lics_info_1(self, p): + if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): + self.current_element["license_info_in_file"] = p[2] + return + self.current_element.setdefault("license_info_in_file", []).append(p[2]) + + @grammar_rule("file_lics_info : FILE_LICS_INFO error") + def p_file_lics_info_2(self, p): + self.logger.append( + f"Error while parsing LicenseInfoInFile: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_comment : FILE_COMMENT text_or_line") + def p_file_comment_1(self, p): + self.current_element["comment"] = p[2] + + @grammar_rule("file_comment : FILE_COMMENT error") + def p_file_comment_2(self, p): + self.logger.append( + f"Error while parsing FileComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_type : FILE_TYPE file_type_value") + def p_file_type_1(self, p): + self.current_element.setdefault("file_type", []).append(FileType[p[2]]) + + @grammar_rule("file_type : FILE_TYPE error") + def p_file_type_2(self, p): + self.logger.append( + f"Error while parsing FileType: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") + def p_file_checksum_1(self, p): + splitted_checksum = p[2].split(":") + algorithm = ChecksumAlgorithm[splitted_checksum[0]] + value = splitted_checksum[1] + self.current_element.setdefault("checksums", []).append(Checksum(algorithm, value)) + + @grammar_rule("file_checksum : FILE_CHECKSUM error") + def p_file_checksum_2(self, p): + self.logger.append( + f"Error while parsing Checksum in file: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") + def p_file_conc_1(self, p): + self.current_element["license_concluded"] = p[2] + + @grammar_rule("file_conc : FILE_LICS_CONC error") + def p_file_conc_2(self, p): + self.logger.append(f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule( + "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" + " | DOCUMENTATION\n| SPDX \n| OTHER ") + def p_file_type_value(self, p): + + p[0] = p[1] + + # parsing methods for package + + @grammar_rule("package_name : PKG_NAME LINE") + def p_package_name(self, p): + self.construct_current_element() + self.current_element["class"] = Package + self.current_element["name"] = p[2] + + @grammar_rule("package_name : PKG_NAME error") + def p_package_name_1(self, p): + self.logger.append( + f"Error while parsing PackageName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_desc : PKG_DESC text_or_line") + def p_pkg_desc_1(self, p): + self.current_element["description"] = p[2] + + @grammar_rule("pkg_desc : PKG_DESC error") + def p_pkg_desc_2(self, p): + self.logger.append( + f"Error while parsing PackageDescription: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") + def p_pkg_comment_1(self, p): + self.current_element["comment"] = p[2] + + @grammar_rule("pkg_comment : PKG_COMMENT error") + def p_pkg_comment_2(self, p): + self.logger.append( + f"Error while parsing PackageComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") + def p_pkg_attribution_text_1(self, p): + self.current_element.setdefault("attribution_texts", []).append(p[2]) + + @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error") + def p_pkg_attribution_text_2(self, p): + self.logger.append(f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_summary : PKG_SUM text_or_line") + def p_pkg_summary_1(self, p): + self.current_element["summary"] = p[2] + + @grammar_rule("pkg_summary : PKG_SUM error") + def p_pkg_summary_2(self, p): + self.logger.append( + f"Error while parsing PackageSummary: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") + def p_pkg_cr_text_1(self, p): + self.current_element["copyright_text"] = p[2] + + @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") + def p_pkg_cr_text_2(self, p): + self.logger.append(f"Error while parsing PackageCopyrightText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") + def p_pkg_ext_refs_1(self, p): + category, reference_type, locator = p[2].split(" ") + comment = None + if len(p) == 5: + comment = p[4] + external_package_ref = ExternalPackageRef(ExternalPackageRefCategory[category], reference_type, locator, + comment) + self.current_element.setdefault("external_references", []).append(external_package_ref) + + @grammar_rule("pkg_ext_ref : PKG_EXT_REF error") + def p_pkg_ext_refs_2(self, p): + self.logger.append( + f"Error while parsing ExternalRef in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") + def p_pkg_lic_comment_1(self, p): + self.current_element["license_comment"] = p[2] + + @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") + def p_pkg_lic_comment_2(self, p): + self.logger.append(f"Error while parsing PackageLicenseComments: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") + def p_pkg_lic_decl_1(self, p): + self.current_element["license_declared"] = p[2] + + @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") + def p_pkg_lic_decl_2(self, p): + self.logger.append( + f"Error while parsing LicenseDeclared in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") + def p_pkg_lic_ff_1(self, p): + if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): + self.current_element["license_info_from_files"] = p[2] + else: + self.current_element.setdefault("license_info_from_files", []).append(p[2]) + + @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE error") + def p_pkg_lic_ff_error(self, p): + self.logger.append( + f"Error while parsing LicenseInfoFromFiles in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") + def p_pkg_lic_conc_1(self, p): + self.current_element["license_concluded"] = p[2] + + @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") + def p_pkg_lic_conc_2(self, p): + self.logger.append( + f"Error while parsing LicenseConcluded in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_src_info : PKG_SRC_INFO text_or_line") + def p_pkg_src_info_1(self, p): + self.current_element["source_info"] = p[2] + + @grammar_rule("pkg_src_info : PKG_SRC_INFO error") + def p_pkg_src_info_2(self, p): + self.logger.append( + f"Error while parsing PackageSourceInfo: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") + def p_pkg_checksum_1(self, p): + split_checksum = p[2].split(":") + algorithm = ChecksumAlgorithm[split_checksum[0]] + value = split_checksum[1].strip() + checksum = Checksum(algorithm, value) + self.current_element.setdefault("checksums", []).append(checksum) + + @grammar_rule("pkg_checksum : PKG_CHECKSUM error") + def p_pkg_checksum_2(self, p): + self.logger.append( + f"Error while parsing PackageChecksum: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_verif : PKG_VERF_CODE LINE") + def p_pkg_verif_1(self, p): + verif_code_regex = re.compile(r"([0-9a-f]+)\s*(\(excludes:\s*(.+)\))?", re.UNICODE) + verif_code_code_grp = 1 + verif_code_exc_files_grp = 3 + match = verif_code_regex.match(p[2]) + value = match.group(verif_code_code_grp) + excluded_files = None + if match.group(verif_code_exc_files_grp): + excluded_files = match.group(verif_code_exc_files_grp).split(",") + self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) + + @grammar_rule("pkg_verif : PKG_VERF_CODE error") + def p_pkg_verif_2(self, p): + self.logger.append(f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_home : PKG_HOME line_or_no_assertion_or_none") + def p_pkg_home_1(self, p): + self.current_element["homepage"] = p[2] + + @grammar_rule("pkg_home : PKG_HOME error") + def p_pkg_home_2(self, p): + self.logger.append( + f"Error while parsing PackageHomePage: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_down_location : PKG_DOWN line_or_no_assertion_or_none") + def p_pkg_down_location_1(self, p): + self.current_element["download_location"] = p[2] + + @grammar_rule("pkg_down_location : PKG_DOWN error") + def p_pkg_down_location_2(self, p): + self.logger.append(f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED LINE") + def p_pkg_files_analyzed_1(self, p): + if p[2] in ['false', 'False']: + self.current_element["files_analyzed"] = False + if p[2] in ['true', 'True']: + self.current_element["files_analyzed"] = True + + @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED error") + def p_pkg_files_analyzed_2(self, p): + self.logger.append(f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("pkg_orig : PKG_ORIG pkg_supplier_values") + def p_pkg_orig_1(self, p): + self.current_element["originator"] = p[2] + + @grammar_rule("pkg_orig : PKG_ORIG error") + def p_pkg_orig_2(self, p): + self.logger.append( + f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_supplier : PKG_SUPPL pkg_supplier_values") + def p_pkg_supplier_1(self, p): + self.current_element["supplier"] = p[2] + + @grammar_rule("pkg_supplier : PKG_SUPPL error") + def p_pkg_supplier_2(self, p): + self.logger.append( + f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("pkg_supplier_values : NO_ASSERTION") + def p_pkg_supplier_values_1(self, p): + p[0] = SpdxNoAssertion() + + @grammar_rule("pkg_supplier_values : PERSON_VALUE\n | ORG_VALUE\n | TOOL_VALUE") + def p_pkg_supplier_values_2(self, p): + p[0] = ActorParser.parse_actor(p[1]) + + @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") + def p_pkg_file_name(self, p): + self.current_element["file_name"] = p[2] + + @grammar_rule("pkg_file_name : PKG_FILE_NAME error") + def p_pkg_file_name_1(self, p): + self.logger.append( + f"Error while parsing PackageFileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("package_version : PKG_VERSION LINE") + def p_package_version_1(self, p): + self.current_element["version"] = p[2] + + @grammar_rule("package_version : PKG_VERSION error") + def p_package_version_2(self, p): + self.logger.append( + f"Error while parsing PackageVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") + def p_primary_package_purpose_1(self, p): + + self.current_element["primary_package_purpose"] = PackagePurpose[p[2].replace("-", "_")] + + @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") + def p_primary_package_purpose_2(self, p): + self.logger.append(f"Error while parsing PrimaryPackagePurpose: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("primary_package_purpose_value : APPLICATION\n | FRAMEWORK\n | LIBRARY\n | CONTAINER\n " + "| OPERATING_SYSTEM \n | DEVICE \n| FIRMWARE\n | SOURCE\n | ARCHIVE\n | FILE\n | INSTALL\n | OTHER") + def p_primary_package_purpose_value(self, p): + p[0] = p[1] + + @grammar_rule("built_date : BUILT_DATE DATE") + def p_built_date_1(self, p): + self.current_element["built_date"] = datetime_from_str(p[2]) + + @grammar_rule("built_date : BUILT_DATE error") + def p_built_date_2(self, p): + self.logger.append( + f"Error while parsing BuiltDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("release_date : RELEASE_DATE DATE") + def p_release_date_1(self, p): + self.current_element["release_date"] = datetime_from_str(p[2]) + + @grammar_rule("release_date : RELEASE_DATE error") + def p_release_date_2(self, p): + self.logger.append( + f"Error while parsing ReleaseDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") + def p_valid_until_date_1(self, p): + self.current_element["valid_until_date"] = datetime_from_str(p[2]) + + @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") + def p_valid_until_date_2(self, p): + self.logger.append( + f"Error while parsing ValidUntilDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + # parsing methods for snippet + @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") + def p_snip_spdx_id(self, p): + self.construct_current_element() + self.current_element["class"] = Snippet + self.current_element["spdx_id"] = p[2] + + @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") + def p_snip_spdx_id_1(self, p): + self.logger.append( + f"Error while parsing SnippetSPDXID: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("snip_name : SNIPPET_NAME LINE") + def p_snippet_name(self, p): + self.current_element["name"] = p[2] + + @grammar_rule("snip_name : SNIPPET_NAME error") + def p_snippet_name_1(self, p): + self.logger.append( + f"Error while parsing SnippetName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") + def p_snippet_comment(self, p): + self.current_element["comment"] = p[2] + + @grammar_rule("snip_comment : SNIPPET_COMMENT error") + def p_snippet_comment_1(self, p): + self.logger.append( + f"Error while parsing SnippetComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") + def p_snippet_attribution_text_1(self, p): + self.current_element.setdefault("attribution_texts", []).append(p[2]) + + @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error") + def p_snippet_attribution_text_2(self, p): + self.logger.append(f"Error while parsing SnippetAttributionText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") + def p_snippet_cr_text(self, p): + self.current_element["copyright_text"] = p[2] + + @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") + def p_snippet_cr_text_1(self, p): + self.logger.append(f"Error while parsing SnippetCopyrightText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") + def p_snippet_lic_comment(self, p): + self.current_element["license_comment"] = p[2] + + @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") + def p_snippet_lic_comment_1(self, p): + self.logger.append(f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID LINE") + def p_snip_from_file_spdxid(self, p): + self.current_element["file_spdx_id"] = p[2] + + @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID error") + def p_snip_from_file_spdxid_1(self, p): + self.logger.append(f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") + def p_snippet_concluded_license(self, p): + self.current_element["license_concluded"] = p[2] + + @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") + def p_snippet_concluded_license_1(self, p): + self.logger.append(f"Error while parsing SnippetLicenseConcluded: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") + def p_snippet_lics_info(self, p): + if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): + self.current_element["license_info_in_snippet"] = p[2] + else: + self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) + + @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO error") + def p_snippet_lics_info_1(self, p): + + self.logger.append(f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") + def p_snippet_byte_range(self, p): + range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) + if not range_re.match(p[2].strip()): + self.current_element["logger"].append("Value for SnippetByteRange doesn't match valid range pattern.") + return + startpoint = int(p[2].split(":")[0]) + endpoint = int(p[2].split(":")[-1]) + self.current_element["byte_range"] = startpoint, endpoint + + @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE error") + def p_snippet_byte_range_1(self, p): + + self.logger.append( + f"Error while parsing SnippetByteRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") + def p_snippet_line_range(self, p): + range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) + if not range_re.match(p[2].strip()): + self.current_element["logger"].append("Value for SnippetLineRange doesn't match valid range pattern.") + return + startpoint = int(p[2].split(":")[0]) + endpoint = int(p[2].split(":")[1]) + self.current_element["line_range"] = startpoint, endpoint + + @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE error") + def p_snippet_line_range_1(self, p): + self.logger.append( + f"Error while parsing SnippetLineRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + # parsing methods for annotation + def p_annotator_1(self, p): + """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" + self.construct_current_element() + self.current_element["annotator"] = ActorParser.parse_actor(p[2]) + self.current_element["class"] = Annotation + + @grammar_rule("annotator : ANNOTATOR error") + def p_annotator_2(self, p): + self.logger.append( + f"Error while parsing Annotator: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("annotation_date : ANNOTATION_DATE DATE") + def p_annotation_date_1(self, p): + self.current_element["annotation_date"] = datetime_from_str(p[2]) + + @grammar_rule("annotation_date : ANNOTATION_DATE error") + def p_annotation_date_2(self, p): + self.logger.append( + f"Error while parsing AnnotationDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") + def p_annotation_comment_1(self, p): + self.current_element["annotation_comment"] = p[2] + + @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") + def p_annotation_comment_2(self, p): + self.logger.append( + f"Error while parsing AnnotationComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") + def p_annotation_type_1(self, p): + self.current_element["annotation_type"] = AnnotationType[p[2]] + + @grammar_rule("annotation_type : ANNOTATION_TYPE error") + def p_annotation_type_2(self, p): + self.logger.append( + f"Error while parsing AnnotationType: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("annotation_type_value : OTHER\n| REVIEW") + def p_annotation_type_value(self, p): + p[0] = p[1] + + @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID LINE") + def p_annotation_spdx_id_1(self, p): + self.current_element["spdx_id"] = p[2] + + @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID error") + def p_annotation_spdx_id_2(self, p): + self.logger.append(f"Error while parsing SPDXREF in annotation: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") + + # parsing methods for relationship + @grammar_rule("relationship : RELATIONSHIP relationship_value") + def p_relationship_1(self, p): + splitted_relationship = p[2].split(" ") + + self.construct_current_element() + self.current_element["class"] = Relationship + self.current_element["relationship_type"] = RelationshipType[splitted_relationship[1]] + self.current_element["related_spdx_element_id"] = splitted_relationship[2] + self.current_element["spdx_element_id"] = splitted_relationship[0] + + @grammar_rule("relationship : RELATIONSHIP error") + def p_relationship_2(self, p): + self.logger.append( + f"Error while parsing Relationship: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("relationship_value : DOC_REF_ID LINE") + def p_relationship_value_with_doc_ref(self, p): + + p[0] = p[1] + ":" + p[2] + + @grammar_rule("relationship_value : LINE") + def p_relationship_value_without_doc_ref(self, p): + + p[0] = p[1] + + @grammar_rule("relationship_comment : RELATIONSHIP_COMMENT text_or_line") + def p_relationship_comment_1(self, p): + self.current_element["comment"] = p[2] + + @grammar_rule("relationship_comment : RELATIONSHIP_COMMENT error") + def p_relationship_comment_2(self, p): + self.logger.append( + f"Error while parsing RelationshipComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + def p_error(self, p): + pass + + def build(self, **kwargs): + self.lex = SPDXLexer() + self.lex.build(reflags=re.UNICODE) + self.yacc = yacc.yacc(module=self, **kwargs) + + def parse(self, text): + self.yacc.parse(text, lexer=self.lex) + self.construct_current_element() + document = Document(creation_info=CreationInfo(**self.creation_info), **self.elements_build) + print(self.logger.get_messages()) + return document, self.logger.get_messages() + + def construct_current_element(self): + if "class" in self.current_element: + class_name = self.current_element.pop("class") + self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append( + construct_or_raise_parsing_error(class_name, self.current_element)) + self.current_element = dict() + + +CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", + Package="packages", ExtractedLicensingInfo="extracted_licensing_info") diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py new file mode 100644 index 000000000..3692571a2 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -0,0 +1,246 @@ +# Copyright (c) 2014 Ahmed H. Ismail +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +from datetime import datetime +from unittest import TestCase + +from license_expression import get_spdx_licensing + +from spdx.model.actor import Actor, ActorType +from spdx.model.annotation import AnnotationType +from spdx.model.checksum import Checksum, ChecksumAlgorithm +from spdx.model.external_document_ref import ExternalDocumentRef +from spdx.model.file import FileType +from spdx.model.package import PackagePurpose, ExternalPackageRefCategory, ExternalPackageRef +from spdx.model.relationship import RelationshipType +from spdx.model.version import Version +from spdx.parser.tagvalue.parser.tagvalue import Parser + +document_str = '\n'.join([ + 'SPDXVersion: SPDX-2.3', + 'DataLicense: CC0-1.0', + 'DocumentName: Sample_Document-V2.3', + 'SPDXID: SPDXRef-DOCUMENT', + 'DocumentComment: Sample Comment', + 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', + 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759' +]) + +creation_str = '\n'.join([ + 'Creator: Person: Bob (bob@example.com)', + 'Creator: Organization: Acme.', + 'Created: 2010-02-03T00:00:00Z', + 'CreatorComment: Sample Comment', + 'LicenseListVersion: 3.17' +]) + +package_str = '\n'.join([ + 'PackageName: Test', + 'SPDXID: SPDXRef-Package', + 'PackageVersion: Version 0.9.2', + 'PackageDownloadLocation: http://example.com/test', + 'FilesAnalyzed: True', + 'PackageSummary: Test package', + 'PackageSourceInfo: Version 1.0 of test', + 'PackageFileName: test-1.0.zip', + 'PackageSupplier: Organization:ACME', + 'PackageOriginator: Organization:ACME', + 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', + 'PackageDescription: A package.', + 'PackageComment: Comment on the package.', + 'PackageCopyrightText: Copyright 2014 Acme Inc.', + 'PackageLicenseDeclared: Apache-2.0', + 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', + 'PackageLicenseInfoFromFiles: Apache-1.0', + 'PackageLicenseInfoFromFiles: Apache-2.0', + 'PackageLicenseComments: License Comments', + 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', + 'ExternalRefComment: Some comment about the package.', + 'ExternalRef: OTHER LocationRef-acmeforge acmecorp/acmenator/4.1.3-alpha', + 'PrimaryPackagePurpose: OPERATING-SYSTEM', + 'BuiltDate: 2020-01-01T12:00:00Z', + 'ReleaseDate: 2021-01-01T12:00:00Z', + 'ValidUntilDate: 2022-01-01T12:00:00Z' +]) + +file_str = '\n'.join([ + 'FileName: testfile.java', + 'SPDXID: SPDXRef-File', + 'FileType: SOURCE', + 'FileType: TEXT', + 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'LicenseConcluded: Apache-2.0', + 'LicenseInfoInFile: Apache-2.0', + 'FileCopyrightText: Copyright 2014 Acme Inc.', + 'FileComment: Very long file', + 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' +]) + +snippet_str = '\n'.join([ + 'SnippetSPDXID: SPDXRef-Snippet', + 'SnippetLicenseComments: Some lic comment.', + 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', + 'SnippetComment: Some snippet comment.', + 'SnippetName: from linux kernel', + 'SnippetFromFileSPDXID: SPDXRef-DoapSource', + 'SnippetLicenseConcluded: Apache-2.0', + 'LicenseInfoInSnippet: Apache-2.0', + 'SnippetByteRange: 310:420', + 'SnippetLineRange: 5:23', +]) + +annotation_str = '\n'.join([ + 'Annotator: Person: Jane Doe()', + 'AnnotationDate: 2010-01-29T18:30:22Z', + 'AnnotationComment: Document level annotation', + 'AnnotationType: OTHER', + 'SPDXREF: SPDXRef-DOCUMENT' +]) + +relationship_str = '\n'.join([ + 'Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', + 'RelationshipComment: This is a comment.']) + +extracted_licensing_info_str = '\n'.join([ + 'LicenseID: LicenseRef-Beerware-4.2', + 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + 'LicenseName: Beer-Ware License (Version 42)', + 'LicenseCrossReference: http://people.freebsd.org/~phk/', + 'LicenseComment: The beerware license has a couple of other standard variants.' +]) + +unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' + + +class TestParser(TestCase): + maxDiff = None + complete_str = '{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n{8}\n'.format(document_str, creation_str, file_str, + annotation_str, + relationship_str, snippet_str, package_str, + extracted_licensing_info_str, unknown_tag_str) + + def setUp(self): + self.p = Parser() + self.p.build() + + def test_creation_info(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + creation_info = document.creation_info + assert creation_info is not None + assert creation_info.spdx_version == "SPDX-2.3" + assert creation_info.data_license == 'CC0-1.0' + assert creation_info.name == 'Sample_Document-V2.3' + assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' + assert creation_info.document_comment == 'Sample Comment' + assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + TestCase().assertCountEqual(creation_info.creators, + [Actor(ActorType.PERSON, "Bob", "bob@example.com"), + Actor(ActorType.ORGANIZATION, "Acme.")]) + assert creation_info.creator_comment == 'Sample Comment' + assert creation_info.created == datetime(2010, 2, 3, 0, 0) + assert creation_info.license_list_version == Version(3, 17) + self.assertCountEqual(creation_info.external_document_refs, [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", + "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Checksum(ChecksumAlgorithm.SHA1, + "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) + + def test_extracted_licensing_info(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + assert len(document.extracted_licensing_info) == 1 + extracted_licensing_info = document.extracted_licensing_info[0] + assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" + assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" + assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] + assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." + + def test_unknown_tag(self): + document, messages = self.p.parse(self.complete_str) + assert len(messages) == 1 + + def test_package(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + package = document.packages[0] + assert package.name == 'Test' + assert package.spdx_id == 'SPDXRef-Package' + assert package.version == 'Version 0.9.2' + assert len(package.license_info_from_files) == 2 + assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') + assert package.files_analyzed is True + assert package.comment == 'Comment on the package.' + assert len(package.external_references) == 2 + self.assertCountEqual(package.external_references, + [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", + "cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", + "Some comment about the package."), + ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", + "acmecorp/acmenator/4.1.3-alpha")]) + assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM + assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) + assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) + assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) + + def test_file(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + assert len(document.files) == 1 + spdx_file = document.files[0] + assert spdx_file.name == 'testfile.java' + assert spdx_file.spdx_id == 'SPDXRef-File' + assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] + assert spdx_file.comment == 'Very long file' + assert spdx_file.attribution_texts == ['Acknowledgements that might be required to be communicated in ' \ + 'some contexts.'] + assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] + assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") + + def test_annotation(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + assert len(document.annotations) == 1 + annotation = document.annotations[0] + assert annotation.annotator.name == 'Jane Doe' + assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) + assert annotation.annotation_comment == 'Document level annotation' + assert annotation.annotation_type == AnnotationType.OTHER + assert annotation.spdx_id == 'SPDXRef-DOCUMENT' + + def test_relationship(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + relationship = document.relationships[0] + assert relationship.relationship_type == RelationshipType.DESCRIBES + assert relationship.related_spdx_element_id == "SPDXRef-File" + assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" + assert relationship.comment == "This is a comment." + + def test_snippet(self): + document, _ = self.p.parse(self.complete_str) + assert document is not None + assert len(document.snippets) == 1 + snippet = document.snippets[0] + assert snippet.spdx_id == 'SPDXRef-Snippet' + assert snippet.name == 'from linux kernel' + assert snippet.comment == 'Some snippet comment.' + assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' + assert snippet.license_comment == 'Some lic comment.' + assert snippet.file_spdx_id == 'SPDXRef-DoapSource' + assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') + assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] + assert snippet.byte_range[0] == 310 + assert snippet.byte_range[1] == 420 + assert snippet.line_range[0] == 5 + assert snippet.line_range[1] == 23 From 8a6c45bc3fbcfb8348f8b05a31327956de0e84b5 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Mon, 27 Feb 2023 14:12:01 +0100 Subject: [PATCH 03/43] [refactor] use pytest fixture instead of class Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 11 +- .../parser/tagvalue/test_tag_value_parser.py | 255 +++++++++--------- 2 files changed, 140 insertions(+), 126 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 7909f35d9..8fcdff335 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -26,13 +26,13 @@ from spdx.model.relationship import Relationship, RelationshipType from spdx.model.snippet import Snippet from spdx.model.version import Version -from spdx.parser.jsonlikedict.actor_parser import ActorParser +from spdx.parser.actor_parser import ActorParser from spdx.model.document import Document, CreationInfo from spdx.model.file import File, FileType from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.model.spdx_none import SpdxNone -from spdx.parser.jsonlikedict.dict_parsing_functions import construct_or_raise_parsing_error +from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.logger import Logger from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text @@ -891,9 +891,12 @@ def build(self, **kwargs): def parse(self, text): self.yacc.parse(text, lexer=self.lex) self.construct_current_element() - document = Document(creation_info=CreationInfo(**self.creation_info), **self.elements_build) + raise_parsing_error_if_logger_has_messages(self.logger) + creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) + self.elements_build["creation_info"] = creation_info + document = construct_or_raise_parsing_error(Document, self.elements_build) print(self.logger.get_messages()) - return document, self.logger.get_messages() + return document def construct_current_element(self): if "class" in self.current_element: diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index 3692571a2..3893b829c 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -13,6 +13,7 @@ from datetime import datetime from unittest import TestCase +import pytest from license_expression import get_spdx_licensing from spdx.model.actor import Actor, ActorType @@ -23,6 +24,7 @@ from spdx.model.package import PackagePurpose, ExternalPackageRefCategory, ExternalPackageRef from spdx.model.relationship import RelationshipType from spdx.model.version import Version +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser document_str = '\n'.join([ @@ -121,126 +123,135 @@ unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' +complete_str = '{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n'.format(document_str, creation_str, file_str, + annotation_str, + relationship_str, snippet_str, package_str, + extracted_licensing_info_str) -class TestParser(TestCase): - maxDiff = None - complete_str = '{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n{8}\n'.format(document_str, creation_str, file_str, - annotation_str, - relationship_str, snippet_str, package_str, - extracted_licensing_info_str, unknown_tag_str) - - def setUp(self): - self.p = Parser() - self.p.build() - - def test_creation_info(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - creation_info = document.creation_info - assert creation_info is not None - assert creation_info.spdx_version == "SPDX-2.3" - assert creation_info.data_license == 'CC0-1.0' - assert creation_info.name == 'Sample_Document-V2.3' - assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' - assert creation_info.document_comment == 'Sample Comment' - assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' - TestCase().assertCountEqual(creation_info.creators, - [Actor(ActorType.PERSON, "Bob", "bob@example.com"), - Actor(ActorType.ORGANIZATION, "Acme.")]) - assert creation_info.creator_comment == 'Sample Comment' - assert creation_info.created == datetime(2010, 2, 3, 0, 0) - assert creation_info.license_list_version == Version(3, 17) - self.assertCountEqual(creation_info.external_document_refs, [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", - "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", - Checksum(ChecksumAlgorithm.SHA1, - "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) - - def test_extracted_licensing_info(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - assert len(document.extracted_licensing_info) == 1 - extracted_licensing_info = document.extracted_licensing_info[0] - assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" - assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' - assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" - assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] - assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." - - def test_unknown_tag(self): - document, messages = self.p.parse(self.complete_str) - assert len(messages) == 1 - - def test_package(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - package = document.packages[0] - assert package.name == 'Test' - assert package.spdx_id == 'SPDXRef-Package' - assert package.version == 'Version 0.9.2' - assert len(package.license_info_from_files) == 2 - assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') - assert package.files_analyzed is True - assert package.comment == 'Comment on the package.' - assert len(package.external_references) == 2 - self.assertCountEqual(package.external_references, - [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", - "cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", - "Some comment about the package."), - ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", - "acmecorp/acmenator/4.1.3-alpha")]) - assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM - assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) - assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) - assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) - - def test_file(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - assert len(document.files) == 1 - spdx_file = document.files[0] - assert spdx_file.name == 'testfile.java' - assert spdx_file.spdx_id == 'SPDXRef-File' - assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] - assert spdx_file.comment == 'Very long file' - assert spdx_file.attribution_texts == ['Acknowledgements that might be required to be communicated in ' \ - 'some contexts.'] - assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] - assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") - - def test_annotation(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - assert len(document.annotations) == 1 - annotation = document.annotations[0] - assert annotation.annotator.name == 'Jane Doe' - assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) - assert annotation.annotation_comment == 'Document level annotation' - assert annotation.annotation_type == AnnotationType.OTHER - assert annotation.spdx_id == 'SPDXRef-DOCUMENT' - - def test_relationship(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - relationship = document.relationships[0] - assert relationship.relationship_type == RelationshipType.DESCRIBES - assert relationship.related_spdx_element_id == "SPDXRef-File" - assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" - assert relationship.comment == "This is a comment." - - def test_snippet(self): - document, _ = self.p.parse(self.complete_str) - assert document is not None - assert len(document.snippets) == 1 - snippet = document.snippets[0] - assert snippet.spdx_id == 'SPDXRef-Snippet' - assert snippet.name == 'from linux kernel' - assert snippet.comment == 'Some snippet comment.' - assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' - assert snippet.license_comment == 'Some lic comment.' - assert snippet.file_spdx_id == 'SPDXRef-DoapSource' - assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') - assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] - assert snippet.byte_range[0] == 310 - assert snippet.byte_range[1] == 420 - assert snippet.line_range[0] == 5 - assert snippet.line_range[1] == 23 + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_creation_info(parser): + document = parser.parse(complete_str) + assert document is not None + creation_info = document.creation_info + assert creation_info is not None + assert creation_info.spdx_version == "SPDX-2.3" + assert creation_info.data_license == 'CC0-1.0' + assert creation_info.name == 'Sample_Document-V2.3' + assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' + assert creation_info.document_comment == 'Sample Comment' + assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + TestCase().assertCountEqual(creation_info.creators, + [Actor(ActorType.PERSON, "Bob", "bob@example.com"), + Actor(ActorType.ORGANIZATION, "Acme.")]) + assert creation_info.creator_comment == 'Sample Comment' + assert creation_info.created == datetime(2010, 2, 3, 0, 0) + assert creation_info.license_list_version == Version(3, 17) + TestCase().assertCountEqual(creation_info.external_document_refs, + [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", + "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Checksum(ChecksumAlgorithm.SHA1, + "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) + + +def test_extracted_licensing_info(parser): + document = parser.parse(complete_str) + assert document is not None + assert len(document.extracted_licensing_info) == 1 + extracted_licensing_info = document.extracted_licensing_info[0] + assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" + assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" + assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] + assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." + + +def test_package(parser): + document = parser.parse(complete_str) + assert document is not None + package = document.packages[0] + assert package.name == 'Test' + assert package.spdx_id == 'SPDXRef-Package' + assert package.version == 'Version 0.9.2' + assert len(package.license_info_from_files) == 2 + assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') + assert package.files_analyzed is True + assert package.comment == 'Comment on the package.' + assert len(package.external_references) == 2 + TestCase().assertCountEqual(package.external_references, + [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", + "cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", + "Some comment about the package."), + ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", + "acmecorp/acmenator/4.1.3-alpha")]) + assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM + assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) + assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) + assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) + + +def test_file(parser): + document = parser.parse(complete_str) + assert document is not None + assert len(document.files) == 1 + spdx_file = document.files[0] + assert spdx_file.name == 'testfile.java' + assert spdx_file.spdx_id == 'SPDXRef-File' + assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] + assert spdx_file.comment == 'Very long file' + assert spdx_file.attribution_texts == ['Acknowledgements that might be required to be communicated in ' \ + 'some contexts.'] + assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] + assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") + + +def test_annotation(parser): + document = parser.parse(complete_str) + assert document is not None + assert len(document.annotations) == 1 + annotation = document.annotations[0] + assert annotation.annotator.name == 'Jane Doe' + assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) + assert annotation.annotation_comment == 'Document level annotation' + assert annotation.annotation_type == AnnotationType.OTHER + assert annotation.spdx_id == 'SPDXRef-DOCUMENT' + + +def test_relationship(parser): + document = parser.parse(complete_str) + assert document is not None + relationship = document.relationships[0] + assert relationship.relationship_type == RelationshipType.DESCRIBES + assert relationship.related_spdx_element_id == "SPDXRef-File" + assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" + assert relationship.comment == "This is a comment." + + +def test_snippet(parser): + document = parser.parse(complete_str) + assert document is not None + assert len(document.snippets) == 1 + snippet = document.snippets[0] + assert snippet.spdx_id == 'SPDXRef-Snippet' + assert snippet.name == 'from linux kernel' + assert snippet.comment == 'Some snippet comment.' + assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' + assert snippet.license_comment == 'Some lic comment.' + assert snippet.file_spdx_id == 'SPDXRef-DoapSource' + assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') + assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] + assert snippet.byte_range[0] == 310 + assert snippet.byte_range[1] == 420 + assert snippet.line_range[0] == 5 + assert snippet.line_range[1] == 23 + + +def test_unknown_str(parser): + with pytest.raises(SPDXParsingError, match="Unknown tag"): + parser.parse(unknown_tag_str) From 1efdade263e96ba5f9e29741be3bcee034e917f8 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 08:52:55 +0100 Subject: [PATCH 04/43] [issue-382] implement error handling Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 36 +++++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 8fcdff335..a0198f149 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -11,7 +11,6 @@ # limitations under the License. import re -from typing import Optional from license_expression import get_spdx_licensing from ply import yacc @@ -32,6 +31,7 @@ from spdx.model.file import File, FileType from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.model.spdx_none import SpdxNone +from spdx.parser.error import SPDXParsingError from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.logger import Logger from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer @@ -250,6 +250,7 @@ def p_extr_lic_id_2(self, p): @grammar_rule("lic_xref : LICS_CRS_REF LINE") def p_lic_xref_1(self, p): + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element.setdefault("cross_references", []).append(p[2]) @grammar_rule("lic_xref : LICS_CRS_REF error") @@ -848,13 +849,20 @@ def p_annotation_spdx_id_2(self, p): # parsing methods for relationship @grammar_rule("relationship : RELATIONSHIP relationship_value") def p_relationship_1(self, p): - splitted_relationship = p[2].split(" ") - + try: + spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") + except ValueError: + self.logger.append(f"Relationship couldn't be split in spdx_element_id, relationship_type and " + f"related_spdx_element. Line: {p.lineno(1)}") + return self.construct_current_element() self.current_element["class"] = Relationship - self.current_element["relationship_type"] = RelationshipType[splitted_relationship[1]] - self.current_element["related_spdx_element_id"] = splitted_relationship[2] - self.current_element["spdx_element_id"] = splitted_relationship[0] + try: + self.current_element["relationship_type"] = RelationshipType[relationship_type] + except KeyError: + self.logger.append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") + self.current_element["related_spdx_element_id"] = related_spdx_element_id + self.current_element["spdx_element_id"] = spdx_element_id @grammar_rule("relationship : RELATIONSHIP error") def p_relationship_2(self, p): @@ -895,15 +903,23 @@ def parse(self, text): creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) self.elements_build["creation_info"] = creation_info document = construct_or_raise_parsing_error(Document, self.elements_build) - print(self.logger.get_messages()) return document def construct_current_element(self): - if "class" in self.current_element: - class_name = self.current_element.pop("class") + if "class" not in self.current_element: + return + class_name = self.current_element.pop("class") + try: self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append( construct_or_raise_parsing_error(class_name, self.current_element)) - self.current_element = dict() + except SPDXParsingError as err: + self.logger.append(err.get_messages()) + self.current_element = dict() + + def check_that_current_element_matches_class_for_value(self, expected_class): + if expected_class != self.current_element["class"]: + raise SPDXParsingError(["Unexpected current element for value"]) + # what to do now? exit parsing CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", From 71689b980e920daedc1cce1412425ccfa6324685 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 08:57:23 +0100 Subject: [PATCH 05/43] [issue-382] catch exception if not all required arguments are provided for construction Signed-off-by: Meret Behrens --- src/spdx/parser/parsing_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/spdx/parser/parsing_functions.py b/src/spdx/parser/parsing_functions.py index 5beaef5f1..f8cd3ce25 100644 --- a/src/spdx/parser/parsing_functions.py +++ b/src/spdx/parser/parsing_functions.py @@ -20,6 +20,8 @@ def construct_or_raise_parsing_error(object_to_construct: Any, args_for_construc constructed_object = object_to_construct(**args_for_construction) except ConstructorTypeErrors as err: raise SPDXParsingError([f"Error while constructing {object_to_construct.__name__}: {err.get_messages()}"]) + except TypeError as err: + raise SPDXParsingError([f"Error while constructing {object_to_construct.__name__}: {err.args[0]}"]) return constructed_object From 77de34b0b142dc3125e8d026262d09a771145497 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 14:29:20 +0100 Subject: [PATCH 06/43] [refactor] tests Signed-off-by: Meret Behrens --- .../parser/tagvalue/test_annotation_parser.py | 44 ++++ .../tagvalue/test_creation_info_parser.py | 66 +++++ .../test_extracted_licensing_info_parser.py | 40 ++++ .../spdx/parser/tagvalue/test_file_parser.py | 51 ++++ .../parser/tagvalue/test_package_parser.py | 79 ++++++ .../tagvalue/test_relationship_parser.py | 45 ++++ .../parser/tagvalue/test_snippet_parser.py | 54 +++++ .../parser/tagvalue/test_tag_value_parser.py | 225 +----------------- 8 files changed, 381 insertions(+), 223 deletions(-) create mode 100644 tests/spdx/parser/tagvalue/test_annotation_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_creation_info_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_file_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_package_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_relationship_parser.py create mode 100644 tests/spdx/parser/tagvalue/test_snippet_parser.py diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py new file mode 100644 index 000000000..8fbcc2f9b --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -0,0 +1,44 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import datetime + +import pytest + +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + +from spdx.model.annotation import AnnotationType + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_annotation(parser): + annotation_str = '\n'.join([ + 'Annotator: Person: Jane Doe()', + 'AnnotationDate: 2010-01-29T18:30:22Z', + 'AnnotationComment: Document level annotation', + 'AnnotationType: OTHER', + 'SPDXREF: SPDXRef-DOCUMENT' + ]) + document = parser.parse("\n".join([DOCUMENT_STR, annotation_str])) + assert document is not None + assert len(document.annotations) == 1 + annotation = document.annotations[0] + assert annotation.annotator.name == 'Jane Doe' + assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) + assert annotation.annotation_comment == 'Document level annotation' + assert annotation.annotation_type == AnnotationType.OTHER + assert annotation.spdx_id == 'SPDXRef-DOCUMENT' diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py new file mode 100644 index 000000000..3aca7aca9 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -0,0 +1,66 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import datetime +from unittest import TestCase + +import pytest + +from spdx.model.actor import Actor, ActorType +from spdx.model.checksum import Checksum, ChecksumAlgorithm +from spdx.model.external_document_ref import ExternalDocumentRef +from spdx.model.version import Version +from spdx.parser.tagvalue.parser.tagvalue import Parser + +DOCUMENT_STR = '\n'.join([ + 'SPDXVersion: SPDX-2.3', + 'DataLicense: CC0-1.0', + 'DocumentName: Sample_Document-V2.3', + 'SPDXID: SPDXRef-DOCUMENT', + 'DocumentComment: Sample Comment', + 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', + 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', + 'Creator: Person: Bob (bob@example.com)', + 'Creator: Organization: Acme.', + 'Created: 2010-02-03T00:00:00Z', + 'CreatorComment: Sample Comment', + 'LicenseListVersion: 3.17' +]) + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_creation_info(parser): + document = parser.parse(DOCUMENT_STR) + assert document is not None + creation_info = document.creation_info + assert creation_info is not None + assert creation_info.spdx_version == "SPDX-2.3" + assert creation_info.data_license == 'CC0-1.0' + assert creation_info.name == 'Sample_Document-V2.3' + assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' + assert creation_info.document_comment == 'Sample Comment' + assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + TestCase().assertCountEqual(creation_info.creators, + [Actor(ActorType.PERSON, "Bob", "bob@example.com"), + Actor(ActorType.ORGANIZATION, "Acme.")]) + assert creation_info.creator_comment == 'Sample Comment' + assert creation_info.created == datetime(2010, 2, 3, 0, 0) + assert creation_info.license_list_version == Version(3, 17) + TestCase().assertCountEqual(creation_info.external_document_refs, + [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", + "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Checksum(ChecksumAlgorithm.SHA1, + "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py new file mode 100644 index 000000000..7197b0676 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -0,0 +1,40 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_extracted_licensing_info(parser): + extracted_licensing_info_str = '\n'.join([ + 'LicenseID: LicenseRef-Beerware-4.2', + 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + 'LicenseName: Beer-Ware License (Version 42)', + 'LicenseCrossReference: http://people.freebsd.org/~phk/', + 'LicenseComment: The beerware license has a couple of other standard variants.' + ]) + document = parser.parse("\n".join([DOCUMENT_STR, extracted_licensing_info_str])) + assert document is not None + assert len(document.extracted_licensing_info) == 1 + extracted_licensing_info = document.extracted_licensing_info[0] + assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" + assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" + assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] + assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py new file mode 100644 index 000000000..dab3eeedc --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -0,0 +1,51 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from license_expression import get_spdx_licensing + +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + +from spdx.model.file import FileType + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_file(parser): + file_str = '\n'.join([ + 'FileName: testfile.java', + 'SPDXID: SPDXRef-File', + 'FileType: SOURCE', + 'FileType: TEXT', + 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'LicenseConcluded: Apache-2.0', + 'LicenseInfoInFile: Apache-2.0', + 'FileCopyrightText: Copyright 2014 Acme Inc.', + 'FileComment: Very long file', + 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + ]) + document = parser.parse("\n".join([DOCUMENT_STR, file_str])) + assert document is not None + assert len(document.files) == 1 + spdx_file = document.files[0] + assert spdx_file.name == 'testfile.java' + assert spdx_file.spdx_id == 'SPDXRef-File' + assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] + assert spdx_file.comment == 'Very long file' + assert spdx_file.attribution_texts == [ + 'Acknowledgements that might be required to be communicated in some contexts.'] + assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] + assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py new file mode 100644 index 000000000..4de7ffd41 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from datetime import datetime +from unittest import TestCase + +import pytest +from license_expression import get_spdx_licensing + +from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory, PackagePurpose +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_package(parser): + package_str = '\n'.join([ + 'PackageName: Test', + 'SPDXID: SPDXRef-Package', + 'PackageVersion: Version 0.9.2', + 'PackageDownloadLocation: http://example.com/test', + 'FilesAnalyzed: True', + 'PackageSummary: Test package', + 'PackageSourceInfo: Version 1.0 of test', + 'PackageFileName: test-1.0.zip', + 'PackageSupplier: Organization:ACME', + 'PackageOriginator: Organization:ACME', + 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', + 'PackageDescription: A package.', + 'PackageComment: Comment on the package.', + 'PackageCopyrightText: Copyright 2014 Acme Inc.', + 'PackageLicenseDeclared: Apache-2.0', + 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', + 'PackageLicenseInfoFromFiles: Apache-1.0', + 'PackageLicenseInfoFromFiles: Apache-2.0', + 'PackageLicenseComments: License Comments', + 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', + 'ExternalRefComment: Some comment about the package.', + 'ExternalRef: OTHER LocationRef-acmeforge acmecorp/acmenator/4.1.3-alpha', + 'PrimaryPackagePurpose: OPERATING-SYSTEM', + 'BuiltDate: 2020-01-01T12:00:00Z', + 'ReleaseDate: 2021-01-01T12:00:00Z', + 'ValidUntilDate: 2022-01-01T12:00:00Z' + ]) + document = parser.parse("\n".join([DOCUMENT_STR, package_str])) + assert document is not None + package = document.packages[0] + assert package.name == 'Test' + assert package.spdx_id == 'SPDXRef-Package' + assert package.version == 'Version 0.9.2' + assert len(package.license_info_from_files) == 2 + assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') + assert package.files_analyzed is True + assert package.comment == 'Comment on the package.' + assert len(package.external_references) == 2 + TestCase().assertCountEqual(package.external_references, + [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", + "cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", + "Some comment about the package."), + ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", + "acmecorp/acmenator/4.1.3-alpha")]) + assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM + assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) + assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) + assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py new file mode 100644 index 000000000..ba18ea88c --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from spdx.model.relationship import RelationshipType +from spdx.parser.error import SPDXParsingError +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_relationship(parser): + relationship_str = '\n'.join([ + 'Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', + 'RelationshipComment: This is a comment.']) + + document = parser.parse("\n".join([DOCUMENT_STR, relationship_str])) + assert document is not None + relationship = document.relationships[0] + assert relationship.relationship_type == RelationshipType.DESCRIBES + assert relationship.related_spdx_element_id == "SPDXRef-File" + assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" + assert relationship.comment == "This is a comment." + + +@pytest.mark.parametrize("relationship_str, expected_message", + [("Relationship: spdx_id DESCRIBES", "Relationship couldn't be split"), + ("Relationship: spdx_id IS spdx_id", "Invalid RelationshipType IS. Line: 1")]) +def test_falsy_relationship(parser, relationship_str, expected_message): + with pytest.raises(SPDXParsingError, match=expected_message): + parser.parse(relationship_str) diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py new file mode 100644 index 000000000..95f766e52 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -0,0 +1,54 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest +from license_expression import get_spdx_licensing + +from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR + + +@pytest.fixture +def parser(): + spdx_parser = Parser() + spdx_parser.build() + return spdx_parser + + +def test_snippet(parser): + snippet_str = '\n'.join([ + 'SnippetSPDXID: SPDXRef-Snippet', + 'SnippetLicenseComments: Some lic comment.', + 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', + 'SnippetComment: Some snippet comment.', + 'SnippetName: from linux kernel', + 'SnippetFromFileSPDXID: SPDXRef-DoapSource', + 'SnippetLicenseConcluded: Apache-2.0', + 'LicenseInfoInSnippet: Apache-2.0', + 'SnippetByteRange: 310:420', + 'SnippetLineRange: 5:23', + ]) + + document = parser.parse("\n".join([DOCUMENT_STR, snippet_str])) + assert document is not None + assert len(document.snippets) == 1 + snippet = document.snippets[0] + assert snippet.spdx_id == 'SPDXRef-Snippet' + assert snippet.name == 'from linux kernel' + assert snippet.comment == 'Some snippet comment.' + assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' + assert snippet.license_comment == 'Some lic comment.' + assert snippet.file_spdx_id == 'SPDXRef-DoapSource' + assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') + assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] + assert snippet.byte_range[0] == 310 + assert snippet.byte_range[1] == 420 + assert snippet.line_range[0] == 5 + assert snippet.line_range[1] == 23 diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index 3893b829c..b4228ebc7 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -9,125 +9,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys from datetime import datetime from unittest import TestCase import pytest -from license_expression import get_spdx_licensing from spdx.model.actor import Actor, ActorType -from spdx.model.annotation import AnnotationType from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.model.external_document_ref import ExternalDocumentRef -from spdx.model.file import FileType -from spdx.model.package import PackagePurpose, ExternalPackageRefCategory, ExternalPackageRef -from spdx.model.relationship import RelationshipType from spdx.model.version import Version from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser -document_str = '\n'.join([ - 'SPDXVersion: SPDX-2.3', - 'DataLicense: CC0-1.0', - 'DocumentName: Sample_Document-V2.3', - 'SPDXID: SPDXRef-DOCUMENT', - 'DocumentComment: Sample Comment', - 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', - 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759' -]) - -creation_str = '\n'.join([ - 'Creator: Person: Bob (bob@example.com)', - 'Creator: Organization: Acme.', - 'Created: 2010-02-03T00:00:00Z', - 'CreatorComment: Sample Comment', - 'LicenseListVersion: 3.17' -]) - -package_str = '\n'.join([ - 'PackageName: Test', - 'SPDXID: SPDXRef-Package', - 'PackageVersion: Version 0.9.2', - 'PackageDownloadLocation: http://example.com/test', - 'FilesAnalyzed: True', - 'PackageSummary: Test package', - 'PackageSourceInfo: Version 1.0 of test', - 'PackageFileName: test-1.0.zip', - 'PackageSupplier: Organization:ACME', - 'PackageOriginator: Organization:ACME', - 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', - 'PackageDescription: A package.', - 'PackageComment: Comment on the package.', - 'PackageCopyrightText: Copyright 2014 Acme Inc.', - 'PackageLicenseDeclared: Apache-2.0', - 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', - 'PackageLicenseInfoFromFiles: Apache-1.0', - 'PackageLicenseInfoFromFiles: Apache-2.0', - 'PackageLicenseComments: License Comments', - 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', - 'ExternalRefComment: Some comment about the package.', - 'ExternalRef: OTHER LocationRef-acmeforge acmecorp/acmenator/4.1.3-alpha', - 'PrimaryPackagePurpose: OPERATING-SYSTEM', - 'BuiltDate: 2020-01-01T12:00:00Z', - 'ReleaseDate: 2021-01-01T12:00:00Z', - 'ValidUntilDate: 2022-01-01T12:00:00Z' -]) - -file_str = '\n'.join([ - 'FileName: testfile.java', - 'SPDXID: SPDXRef-File', - 'FileType: SOURCE', - 'FileType: TEXT', - 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'LicenseConcluded: Apache-2.0', - 'LicenseInfoInFile: Apache-2.0', - 'FileCopyrightText: Copyright 2014 Acme Inc.', - 'FileComment: Very long file', - 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' -]) - -snippet_str = '\n'.join([ - 'SnippetSPDXID: SPDXRef-Snippet', - 'SnippetLicenseComments: Some lic comment.', - 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', - 'SnippetComment: Some snippet comment.', - 'SnippetName: from linux kernel', - 'SnippetFromFileSPDXID: SPDXRef-DoapSource', - 'SnippetLicenseConcluded: Apache-2.0', - 'LicenseInfoInSnippet: Apache-2.0', - 'SnippetByteRange: 310:420', - 'SnippetLineRange: 5:23', -]) - -annotation_str = '\n'.join([ - 'Annotator: Person: Jane Doe()', - 'AnnotationDate: 2010-01-29T18:30:22Z', - 'AnnotationComment: Document level annotation', - 'AnnotationType: OTHER', - 'SPDXREF: SPDXRef-DOCUMENT' -]) - -relationship_str = '\n'.join([ - 'Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', - 'RelationshipComment: This is a comment.']) - -extracted_licensing_info_str = '\n'.join([ - 'LicenseID: LicenseRef-Beerware-4.2', - 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' - 'LicenseName: Beer-Ware License (Version 42)', - 'LicenseCrossReference: http://people.freebsd.org/~phk/', - 'LicenseComment: The beerware license has a couple of other standard variants.' -]) - -unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' - -complete_str = '{0}\n{1}\n{2}\n{3}\n{4}\n{5}\n{6}\n{7}\n'.format(document_str, creation_str, file_str, - annotation_str, - relationship_str, snippet_str, package_str, - extracted_licensing_info_str) - @pytest.fixture def parser(): @@ -136,122 +29,8 @@ def parser(): return spdx_parser -def test_creation_info(parser): - document = parser.parse(complete_str) - assert document is not None - creation_info = document.creation_info - assert creation_info is not None - assert creation_info.spdx_version == "SPDX-2.3" - assert creation_info.data_license == 'CC0-1.0' - assert creation_info.name == 'Sample_Document-V2.3' - assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' - assert creation_info.document_comment == 'Sample Comment' - assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' - TestCase().assertCountEqual(creation_info.creators, - [Actor(ActorType.PERSON, "Bob", "bob@example.com"), - Actor(ActorType.ORGANIZATION, "Acme.")]) - assert creation_info.creator_comment == 'Sample Comment' - assert creation_info.created == datetime(2010, 2, 3, 0, 0) - assert creation_info.license_list_version == Version(3, 17) - TestCase().assertCountEqual(creation_info.external_document_refs, - [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", - "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", - Checksum(ChecksumAlgorithm.SHA1, - "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) - - -def test_extracted_licensing_info(parser): - document = parser.parse(complete_str) - assert document is not None - assert len(document.extracted_licensing_info) == 1 - extracted_licensing_info = document.extracted_licensing_info[0] - assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" - assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' - assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" - assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] - assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." - - -def test_package(parser): - document = parser.parse(complete_str) - assert document is not None - package = document.packages[0] - assert package.name == 'Test' - assert package.spdx_id == 'SPDXRef-Package' - assert package.version == 'Version 0.9.2' - assert len(package.license_info_from_files) == 2 - assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') - assert package.files_analyzed is True - assert package.comment == 'Comment on the package.' - assert len(package.external_references) == 2 - TestCase().assertCountEqual(package.external_references, - [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", - "cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", - "Some comment about the package."), - ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", - "acmecorp/acmenator/4.1.3-alpha")]) - assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM - assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) - assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) - assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) - - -def test_file(parser): - document = parser.parse(complete_str) - assert document is not None - assert len(document.files) == 1 - spdx_file = document.files[0] - assert spdx_file.name == 'testfile.java' - assert spdx_file.spdx_id == 'SPDXRef-File' - assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] - assert spdx_file.comment == 'Very long file' - assert spdx_file.attribution_texts == ['Acknowledgements that might be required to be communicated in ' \ - 'some contexts.'] - assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] - assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") - - -def test_annotation(parser): - document = parser.parse(complete_str) - assert document is not None - assert len(document.annotations) == 1 - annotation = document.annotations[0] - assert annotation.annotator.name == 'Jane Doe' - assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) - assert annotation.annotation_comment == 'Document level annotation' - assert annotation.annotation_type == AnnotationType.OTHER - assert annotation.spdx_id == 'SPDXRef-DOCUMENT' - - -def test_relationship(parser): - document = parser.parse(complete_str) - assert document is not None - relationship = document.relationships[0] - assert relationship.relationship_type == RelationshipType.DESCRIBES - assert relationship.related_spdx_element_id == "SPDXRef-File" - assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" - assert relationship.comment == "This is a comment." - - -def test_snippet(parser): - document = parser.parse(complete_str) - assert document is not None - assert len(document.snippets) == 1 - snippet = document.snippets[0] - assert snippet.spdx_id == 'SPDXRef-Snippet' - assert snippet.name == 'from linux kernel' - assert snippet.comment == 'Some snippet comment.' - assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' - assert snippet.license_comment == 'Some lic comment.' - assert snippet.file_spdx_id == 'SPDXRef-DoapSource' - assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') - assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] - assert snippet.byte_range[0] == 310 - assert snippet.byte_range[1] == 420 - assert snippet.line_range[0] == 5 - assert snippet.line_range[1] == 23 - - def test_unknown_str(parser): + unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' + with pytest.raises(SPDXParsingError, match="Unknown tag"): parser.parse(unknown_tag_str) From 7b96706e9451f22c6e1122e28cc28b8accaabab7 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 09:21:42 +0100 Subject: [PATCH 07/43] [issue-382] add test and specify error message Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 2 +- .../spdx/parser/tagvalue/test_file_parser.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index a0198f149..248fb5ed8 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -374,7 +374,7 @@ def p_file_type_1(self, p): @grammar_rule("file_type : FILE_TYPE error") def p_file_type_2(self, p): self.logger.append( - f"Error while parsing FileType: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing FileType: Token did not match any of the valid values. Line: {p.lineno(1)}") @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum_1(self, p): diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index dab3eeedc..90eb1e6a8 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -11,6 +11,7 @@ import pytest from license_expression import get_spdx_licensing +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @@ -49,3 +50,26 @@ def test_file(parser): 'Acknowledgements that might be required to be communicated in some contexts.'] assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") + + +def test_invalid_file(parser): + file_str = '\n'.join([ + 'FileName: testfile.java', + 'SPDXID: SPDXRef-File', + 'FileType: SOUCE', + 'FileType: TEXT', + 'FileChecksum: SHA3: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', + 'LicenseConcluded: Apache-2.0', + 'LicenseInfoInFile: Apache-2.0', + 'FileCopyrightText: Copyright 2014 Acme Inc.', + 'FileComment: Very long file', + 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + ]) + + with pytest.raises(SPDXParsingError) as err: + parser.parse(file_str) + + assert err.value.get_messages() == ['Error while parsing FileType: Token did not match specified grammar rule. ' + 'Line: 3', + 'Error while parsing Checksum in file: Token did not match specified grammar ' + 'rule. Line: 5'] From 589466ea384bea55e6c05e41342efee729b31d04 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 12:20:00 +0100 Subject: [PATCH 08/43] [issue-382] refactor relationship_parser Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 22 ++++++------ .../tagvalue/test_relationship_parser.py | 34 +++++++++++++------ 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 248fb5ed8..579a16d81 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -65,7 +65,7 @@ def p_start_2(self, p): # attributes for annotation "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" # attributes for relationship - "| relationship\n| relationship_comment\n" + "| relationship\n" # attributes for snippet "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" "| snip_lic_comment\n| snip_file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" @@ -847,22 +847,29 @@ def p_annotation_spdx_id_2(self, p): f"Line: {p.lineno(1)}") # parsing methods for relationship - @grammar_rule("relationship : RELATIONSHIP relationship_value") + @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " + "| RELATIONSHIP relationship_value") def p_relationship_1(self, p): + self.construct_current_element() try: spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") except ValueError: self.logger.append(f"Relationship couldn't be split in spdx_element_id, relationship_type and " f"related_spdx_element. Line: {p.lineno(1)}") return - self.construct_current_element() self.current_element["class"] = Relationship try: self.current_element["relationship_type"] = RelationshipType[relationship_type] except KeyError: self.logger.append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") + if related_spdx_element_id == "NONE": + related_spdx_element_id = SpdxNone() + if related_spdx_element_id == "NOASSERTION": + related_spdx_element_id = SpdxNoAssertion() self.current_element["related_spdx_element_id"] = related_spdx_element_id self.current_element["spdx_element_id"] = spdx_element_id + if len(p) == 5: + self.current_element["comment"] = p[4] @grammar_rule("relationship : RELATIONSHIP error") def p_relationship_2(self, p): @@ -879,15 +886,6 @@ def p_relationship_value_without_doc_ref(self, p): p[0] = p[1] - @grammar_rule("relationship_comment : RELATIONSHIP_COMMENT text_or_line") - def p_relationship_comment_1(self, p): - self.current_element["comment"] = p[2] - - @grammar_rule("relationship_comment : RELATIONSHIP_COMMENT error") - def p_relationship_comment_2(self, p): - self.logger.append( - f"Error while parsing RelationshipComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - def p_error(self, p): pass diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index ba18ea88c..12fc37ecd 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -10,7 +10,9 @@ # limitations under the License. import pytest -from spdx.model.relationship import RelationshipType +from spdx.model.relationship import RelationshipType, Relationship +from spdx.model.spdx_no_assertion import SpdxNoAssertion +from spdx.model.spdx_none import SpdxNone from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @@ -23,23 +25,33 @@ def parser(): return spdx_parser -def test_relationship(parser): - relationship_str = '\n'.join([ - 'Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', - 'RelationshipComment: This is a comment.']) - +@pytest.mark.parametrize("relationship_str, expected_relationship", + [('\n'.join(['Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', + 'RelationshipComment: This is a comment.']), + Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, + "SPDXRef-File", "This is a comment.")), + ('Relationship: SPDXRef-DOCUMENT PATCH_FOR NOASSERTION', + Relationship("SPDXRef-DOCUMENT", RelationshipType.PATCH_FOR, + SpdxNoAssertion())), + ('Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE', + Relationship("SPDXRef-CarolCompression", RelationshipType.DEPENDS_ON, SpdxNone())), + ('Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef', + Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON, + "DocumentRef:AnotherRef")) + ]) +def test_relationship(parser, relationship_str, expected_relationship): document = parser.parse("\n".join([DOCUMENT_STR, relationship_str])) assert document is not None relationship = document.relationships[0] - assert relationship.relationship_type == RelationshipType.DESCRIBES - assert relationship.related_spdx_element_id == "SPDXRef-File" - assert relationship.spdx_element_id == "SPDXRef-DOCUMENT" - assert relationship.comment == "This is a comment." + assert relationship == expected_relationship @pytest.mark.parametrize("relationship_str, expected_message", [("Relationship: spdx_id DESCRIBES", "Relationship couldn't be split"), - ("Relationship: spdx_id IS spdx_id", "Invalid RelationshipType IS. Line: 1")]) + ("Relationship: spdx_id IS spdx_id", "Invalid RelationshipType IS. Line: 1"), + ("Relationship: spdx_id IS spdx_id\nRelationshipComment: SOURCE", + "Error while parsing Relationship: Token did not match specified grammar rule. Line: 1") + ]) def test_falsy_relationship(parser, relationship_str, expected_message): with pytest.raises(SPDXParsingError, match=expected_message): parser.parse(relationship_str) From 7bf3cb8c6fde8bdb9c4fd23bbb0ee255a2c9792f Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 14:32:53 +0100 Subject: [PATCH 09/43] [issue-382] use individual logger for current_element Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 220 ++++++++++-------- .../spdx/parser/tagvalue/test_file_parser.py | 7 +- 2 files changed, 128 insertions(+), 99 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 579a16d81..30b4ac368 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -45,8 +45,8 @@ def __init__(self): self.tokens = SPDXLexer.tokens self.logger = Logger() self.element_stack = [] - self.current_element = dict() - self.creation_info = dict() + self.current_element = {"logger": Logger()} + self.creation_info = {"logger": Logger()} self.elements_build = dict() @grammar_rule("start : start attrib ") @@ -144,7 +144,7 @@ def p_lics_list_ver_1(self, p): @grammar_rule("lics_list_ver : LIC_LIST_VER error") def p_lics_list_ver_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing LicenseListVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_comment : DOC_COMMENT text_or_line") @@ -153,7 +153,7 @@ def p_doc_comment_1(self, p): @grammar_rule("doc_comment : DOC_COMMENT error") def p_doc_comment_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing DocumentComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_namespace : DOC_NAMESPACE LINE") @@ -162,7 +162,7 @@ def p_doc_namespace_1(self, p): @grammar_rule("doc_namespace : DOC_NAMESPACE error") def p_doc_namespace_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing DocumentNamespace: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("data_lics : DOC_LICENSE LINE") @@ -171,7 +171,7 @@ def p_data_license_1(self, p): @grammar_rule("data_lics : DOC_LICENSE error") def p_data_license_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing DataLicense: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_name : DOC_NAME LINE") @@ -180,7 +180,7 @@ def p_doc_name_1(self, p): @grammar_rule("doc_name : DOC_NAME error") def p_doc_name_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing DocumentName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") @@ -196,7 +196,7 @@ def p_ext_doc_refs_1(self, p): @grammar_rule("ext_doc_ref : EXT_DOC_REF error") def p_ext_doc_refs_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing ExternalDocumentRef: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("spdx_version : DOC_VERSION LINE") @@ -205,7 +205,7 @@ def p_spdx_version_1(self, p): @grammar_rule("spdx_version : DOC_VERSION error") def p_spdx_version_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing SPDXVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") @@ -214,7 +214,7 @@ def p_creator_comment_1(self, p): @grammar_rule("creator_comment : CREATOR_COMMENT error") def p_creator_comment_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing CreatorComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") def p_creator_1(self, p): @@ -223,7 +223,7 @@ def p_creator_1(self, p): @grammar_rule("creator : CREATOR error") def p_creator_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing Creator: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("created : CREATED DATE") @@ -232,7 +232,7 @@ def p_created_1(self, p): @grammar_rule("created : CREATED error") def p_created_2(self, p): - self.logger.append( + self.creation_info["logger"].append( f"Error while parsing Created: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for extracted licensing info @@ -245,7 +245,7 @@ def p_extr_lic_id_1(self, p): @grammar_rule("extr_lic_id : LICS_ID error") def p_extr_lic_id_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("lic_xref : LICS_CRS_REF LINE") @@ -255,8 +255,9 @@ def p_lic_xref_1(self, p): @grammar_rule("lic_xref : LICS_CRS_REF error") def p_lic_xref_2(self, p): - self.logger.append(f"Error while parsing LicenseCrossReference: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing LicenseCrossReference: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("lic_comment : LICS_COMMENT text_or_line") def p_lic_comment_1(self, p): @@ -264,7 +265,7 @@ def p_lic_comment_1(self, p): @grammar_rule("lic_comment : LICS_COMMENT error") def p_lic_comment_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("extr_lic_name : LICS_NAME line_or_no_assertion") @@ -273,7 +274,7 @@ def p_extr_lic_name_1(self, p): @grammar_rule("extr_lic_name : LICS_NAME error") def p_extr_lic_name_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("extr_lic_text : LICS_TEXT text_or_line") @@ -282,7 +283,7 @@ def p_extr_lic_text_1(self, p): @grammar_rule("extr_lic_text : LICS_TEXT error") def p_extr_lic_text_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing ExtractedText: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for file @@ -291,14 +292,13 @@ def p_extr_lic_text_2(self, p): def p_file_name_1(self, p): self.construct_current_element() self.element_stack.append(p[2]) - self.current_element = dict() self.current_element["name"] = p[2] self.current_element["class"] = File @grammar_rule("file_name : FILE_NAME error") def p_file_name_2(self, p): - self.logger.append( - f"Error while parsing FileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_contrib : FILE_CONTRIB LINE") def p_file_contrib_1(self, p): @@ -306,7 +306,7 @@ def p_file_contrib_1(self, p): @grammar_rule("file_contrib : FILE_CONTRIB error") def p_file_contrib_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileContributor: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_notice : FILE_NOTICE text_or_line") @@ -315,7 +315,7 @@ def p_file_notice_1(self, p): @grammar_rule("file_notice : FILE_NOTICE error") def p_file_notice_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileNotice: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") @@ -325,7 +325,7 @@ def p_file_cr_text_1(self, p): @grammar_rule("file_cr_text : FILE_CR_TEXT error") def p_file_cr_text_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileCopyrightText: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") @@ -334,8 +334,9 @@ def p_file_lics_comment_1(self, p): @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") def p_file_lics_comment_2(self, p): - self.logger.append(f"Error while parsing LicenseComments in file: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing LicenseComments in file: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") def p_file_attribution_text_1(self, p): @@ -343,7 +344,7 @@ def p_file_attribution_text_1(self, p): @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT error") def p_file_attribution_text_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileAttributionText: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") @@ -355,7 +356,7 @@ def p_file_lics_info_1(self, p): @grammar_rule("file_lics_info : FILE_LICS_INFO error") def p_file_lics_info_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseInfoInFile: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_comment : FILE_COMMENT text_or_line") @@ -364,7 +365,7 @@ def p_file_comment_1(self, p): @grammar_rule("file_comment : FILE_COMMENT error") def p_file_comment_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_type : FILE_TYPE file_type_value") @@ -373,7 +374,7 @@ def p_file_type_1(self, p): @grammar_rule("file_type : FILE_TYPE error") def p_file_type_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing FileType: Token did not match any of the valid values. Line: {p.lineno(1)}") @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") @@ -385,7 +386,7 @@ def p_file_checksum_1(self, p): @grammar_rule("file_checksum : FILE_CHECKSUM error") def p_file_checksum_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing Checksum in file: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") @@ -394,8 +395,9 @@ def p_file_conc_1(self, p): @grammar_rule("file_conc : FILE_LICS_CONC error") def p_file_conc_2(self, p): - self.logger.append(f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule( "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" @@ -409,13 +411,14 @@ def p_file_type_value(self, p): @grammar_rule("package_name : PKG_NAME LINE") def p_package_name(self, p): self.construct_current_element() + self.element_stack.push("package") self.current_element["class"] = Package self.current_element["name"] = p[2] @grammar_rule("package_name : PKG_NAME error") def p_package_name_1(self, p): - self.logger.append( - f"Error while parsing PackageName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_desc : PKG_DESC text_or_line") def p_pkg_desc_1(self, p): @@ -423,7 +426,7 @@ def p_pkg_desc_1(self, p): @grammar_rule("pkg_desc : PKG_DESC error") def p_pkg_desc_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageDescription: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") @@ -432,7 +435,7 @@ def p_pkg_comment_1(self, p): @grammar_rule("pkg_comment : PKG_COMMENT error") def p_pkg_comment_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") @@ -441,8 +444,9 @@ def p_pkg_attribution_text_1(self, p): @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error") def p_pkg_attribution_text_2(self, p): - self.logger.append(f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_summary : PKG_SUM text_or_line") def p_pkg_summary_1(self, p): @@ -450,7 +454,7 @@ def p_pkg_summary_1(self, p): @grammar_rule("pkg_summary : PKG_SUM error") def p_pkg_summary_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageSummary: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") @@ -459,8 +463,9 @@ def p_pkg_cr_text_1(self, p): @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") def p_pkg_cr_text_2(self, p): - self.logger.append(f"Error while parsing PackageCopyrightText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PackageCopyrightText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_ext_refs_1(self, p): @@ -474,7 +479,7 @@ def p_pkg_ext_refs_1(self, p): @grammar_rule("pkg_ext_ref : PKG_EXT_REF error") def p_pkg_ext_refs_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing ExternalRef in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -484,8 +489,9 @@ def p_pkg_lic_comment_1(self, p): @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") def p_pkg_lic_comment_2(self, p): - self.logger.append(f"Error while parsing PackageLicenseComments: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PackageLicenseComments: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") def p_pkg_lic_decl_1(self, p): @@ -493,7 +499,7 @@ def p_pkg_lic_decl_1(self, p): @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") def p_pkg_lic_decl_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseDeclared in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -506,7 +512,7 @@ def p_pkg_lic_ff_1(self, p): @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE error") def p_pkg_lic_ff_error(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseInfoFromFiles in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -516,7 +522,7 @@ def p_pkg_lic_conc_1(self, p): @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") def p_pkg_lic_conc_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing LicenseConcluded in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -526,7 +532,7 @@ def p_pkg_src_info_1(self, p): @grammar_rule("pkg_src_info : PKG_SRC_INFO error") def p_pkg_src_info_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageSourceInfo: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") @@ -539,7 +545,7 @@ def p_pkg_checksum_1(self, p): @grammar_rule("pkg_checksum : PKG_CHECKSUM error") def p_pkg_checksum_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageChecksum: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_verif : PKG_VERF_CODE LINE") @@ -556,8 +562,9 @@ def p_pkg_verif_1(self, p): @grammar_rule("pkg_verif : PKG_VERF_CODE error") def p_pkg_verif_2(self, p): - self.logger.append(f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_home : PKG_HOME line_or_no_assertion_or_none") def p_pkg_home_1(self, p): @@ -565,7 +572,7 @@ def p_pkg_home_1(self, p): @grammar_rule("pkg_home : PKG_HOME error") def p_pkg_home_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageHomePage: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_down_location : PKG_DOWN line_or_no_assertion_or_none") @@ -574,8 +581,9 @@ def p_pkg_down_location_1(self, p): @grammar_rule("pkg_down_location : PKG_DOWN error") def p_pkg_down_location_2(self, p): - self.logger.append(f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED LINE") def p_pkg_files_analyzed_1(self, p): @@ -586,8 +594,9 @@ def p_pkg_files_analyzed_1(self, p): @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED error") def p_pkg_files_analyzed_2(self, p): - self.logger.append(f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("pkg_orig : PKG_ORIG pkg_supplier_values") def p_pkg_orig_1(self, p): @@ -595,7 +604,7 @@ def p_pkg_orig_1(self, p): @grammar_rule("pkg_orig : PKG_ORIG error") def p_pkg_orig_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_supplier : PKG_SUPPL pkg_supplier_values") @@ -604,7 +613,7 @@ def p_pkg_supplier_1(self, p): @grammar_rule("pkg_supplier : PKG_SUPPL error") def p_pkg_supplier_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_supplier_values : NO_ASSERTION") @@ -621,7 +630,7 @@ def p_pkg_file_name(self, p): @grammar_rule("pkg_file_name : PKG_FILE_NAME error") def p_pkg_file_name_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageFileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("package_version : PKG_VERSION LINE") @@ -630,7 +639,7 @@ def p_package_version_1(self, p): @grammar_rule("package_version : PKG_VERSION error") def p_package_version_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing PackageVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") @@ -640,8 +649,9 @@ def p_primary_package_purpose_1(self, p): @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") def p_primary_package_purpose_2(self, p): - self.logger.append(f"Error while parsing PrimaryPackagePurpose: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing PrimaryPackagePurpose: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("primary_package_purpose_value : APPLICATION\n | FRAMEWORK\n | LIBRARY\n | CONTAINER\n " "| OPERATING_SYSTEM \n | DEVICE \n| FIRMWARE\n | SOURCE\n | ARCHIVE\n | FILE\n | INSTALL\n | OTHER") @@ -654,7 +664,7 @@ def p_built_date_1(self, p): @grammar_rule("built_date : BUILT_DATE error") def p_built_date_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing BuiltDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("release_date : RELEASE_DATE DATE") @@ -663,7 +673,7 @@ def p_release_date_1(self, p): @grammar_rule("release_date : RELEASE_DATE error") def p_release_date_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing ReleaseDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") @@ -672,7 +682,7 @@ def p_valid_until_date_1(self, p): @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") def p_valid_until_date_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing ValidUntilDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for snippet @@ -684,7 +694,7 @@ def p_snip_spdx_id(self, p): @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") def p_snip_spdx_id_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing SnippetSPDXID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snip_name : SNIPPET_NAME LINE") @@ -693,7 +703,7 @@ def p_snippet_name(self, p): @grammar_rule("snip_name : SNIPPET_NAME error") def p_snippet_name_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing SnippetName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") @@ -702,7 +712,7 @@ def p_snippet_comment(self, p): @grammar_rule("snip_comment : SNIPPET_COMMENT error") def p_snippet_comment_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing SnippetComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") @@ -711,8 +721,9 @@ def p_snippet_attribution_text_1(self, p): @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error") def p_snippet_attribution_text_2(self, p): - self.logger.append(f"Error while parsing SnippetAttributionText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SnippetAttributionText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") def p_snippet_cr_text(self, p): @@ -720,8 +731,9 @@ def p_snippet_cr_text(self, p): @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") def p_snippet_cr_text_1(self, p): - self.logger.append(f"Error while parsing SnippetCopyrightText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SnippetCopyrightText: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") def p_snippet_lic_comment(self, p): @@ -729,8 +741,9 @@ def p_snippet_lic_comment(self, p): @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") def p_snippet_lic_comment_1(self, p): - self.logger.append(f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID LINE") def p_snip_from_file_spdxid(self, p): @@ -738,8 +751,9 @@ def p_snip_from_file_spdxid(self, p): @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID error") def p_snip_from_file_spdxid_1(self, p): - self.logger.append(f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") def p_snippet_concluded_license(self, p): @@ -747,8 +761,9 @@ def p_snippet_concluded_license(self, p): @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") def p_snippet_concluded_license_1(self, p): - self.logger.append(f"Error while parsing SnippetLicenseConcluded: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SnippetLicenseConcluded: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") def p_snippet_lics_info(self, p): @@ -760,8 +775,9 @@ def p_snippet_lics_info(self, p): @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO error") def p_snippet_lics_info_1(self, p): - self.logger.append(f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") def p_snippet_byte_range(self, p): @@ -776,7 +792,7 @@ def p_snippet_byte_range(self, p): @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE error") def p_snippet_byte_range_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing SnippetByteRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") @@ -791,7 +807,7 @@ def p_snippet_line_range(self, p): @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE error") def p_snippet_line_range_1(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing SnippetLineRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for annotation @@ -803,8 +819,8 @@ def p_annotator_1(self, p): @grammar_rule("annotator : ANNOTATOR error") def p_annotator_2(self, p): - self.logger.append( - f"Error while parsing Annotator: Token did not match specified grammar rule. Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_date : ANNOTATION_DATE DATE") def p_annotation_date_1(self, p): @@ -812,7 +828,7 @@ def p_annotation_date_1(self, p): @grammar_rule("annotation_date : ANNOTATION_DATE error") def p_annotation_date_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing AnnotationDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") @@ -821,7 +837,7 @@ def p_annotation_comment_1(self, p): @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") def p_annotation_comment_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing AnnotationComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") @@ -830,7 +846,7 @@ def p_annotation_type_1(self, p): @grammar_rule("annotation_type : ANNOTATION_TYPE error") def p_annotation_type_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing AnnotationType: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_type_value : OTHER\n| REVIEW") @@ -843,8 +859,9 @@ def p_annotation_spdx_id_1(self, p): @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID error") def p_annotation_spdx_id_2(self, p): - self.logger.append(f"Error while parsing SPDXREF in annotation: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Error while parsing SPDXREF in annotation: Token did not match specified grammar rule. " + f"Line: {p.lineno(1)}") # parsing methods for relationship @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " @@ -854,14 +871,15 @@ def p_relationship_1(self, p): try: spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") except ValueError: - self.logger.append(f"Relationship couldn't be split in spdx_element_id, relationship_type and " - f"related_spdx_element. Line: {p.lineno(1)}") + self.current_element["logger"].append( + f"Relationship couldn't be split in spdx_element_id, relationship_type and " + f"related_spdx_element. Line: {p.lineno(1)}") return self.current_element["class"] = Relationship try: self.current_element["relationship_type"] = RelationshipType[relationship_type] except KeyError: - self.logger.append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") + self.current_element["logger"].append(f"Invalid RelationshipType {relationship_type}. Line: {p.lineno(1)}") if related_spdx_element_id == "NONE": related_spdx_element_id = SpdxNone() if related_spdx_element_id == "NOASSERTION": @@ -873,7 +891,7 @@ def p_relationship_1(self, p): @grammar_rule("relationship : RELATIONSHIP error") def p_relationship_2(self, p): - self.logger.append( + self.current_element["logger"].append( f"Error while parsing Relationship: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("relationship_value : DOC_REF_ID LINE") @@ -897,6 +915,10 @@ def build(self, **kwargs): def parse(self, text): self.yacc.parse(text, lexer=self.lex) self.construct_current_element() + try: + raise_parsing_error_if_logger_has_messages(self.creation_info.pop("logger"), "CreationInfo") + except SPDXParsingError as err: + self.logger.append(err.get_messages()) raise_parsing_error_if_logger_has_messages(self.logger) creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) self.elements_build["creation_info"] = creation_info @@ -905,14 +927,21 @@ def parse(self, text): def construct_current_element(self): if "class" not in self.current_element: + self.current_element = {"logger": Logger()} return class_name = self.current_element.pop("class") + try: + raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), class_name.__name__) + except SPDXParsingError as err: + self.logger.append(err.get_messages()) + self.current_element = {"logger": Logger()} + return try: self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append( construct_or_raise_parsing_error(class_name, self.current_element)) except SPDXParsingError as err: self.logger.append(err.get_messages()) - self.current_element = dict() + self.current_element = {"logger": Logger()} def check_that_current_element_matches_class_for_value(self, expected_class): if expected_class != self.current_element["class"]: @@ -922,3 +951,4 @@ def check_that_current_element_matches_class_for_value(self, expected_class): CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") + diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index 90eb1e6a8..15ca0fa59 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -69,7 +69,6 @@ def test_invalid_file(parser): with pytest.raises(SPDXParsingError) as err: parser.parse(file_str) - assert err.value.get_messages() == ['Error while parsing FileType: Token did not match specified grammar rule. ' - 'Line: 3', - 'Error while parsing Checksum in file: Token did not match specified grammar ' - 'rule. Line: 5'] + assert err.value.get_messages() == [["Error while parsing File: ['Error while parsing FileType: Token did not " + "match any of the valid values. Line: 3', 'Error while parsing Checksum in " + "file: Token did not match specified grammar rule. Line: 5']"]] From 4d48b6e2e2859835ee0690be55405fbc3e11fc62 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 15:09:16 +0100 Subject: [PATCH 10/43] [issue-382] add helper method to start a new current element Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 41 +++++++++++-------- .../tagvalue/test_relationship_parser.py | 14 +++++-- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 30b4ac368..1f6bc1566 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -11,6 +11,7 @@ # limitations under the License. import re +from typing import Any from license_expression import get_spdx_licensing from ply import yacc @@ -239,12 +240,12 @@ def p_created_2(self, p): @grammar_rule("extr_lic_id : LICS_ID LINE") def p_extr_lic_id_1(self, p): - self.construct_current_element() - self.current_element["class"] = ExtractedLicensingInfo + self.initialize_new_current_element(ExtractedLicensingInfo) self.current_element["license_id"] = p[2] @grammar_rule("extr_lic_id : LICS_ID error") def p_extr_lic_id_2(self, p): + self.initialize_new_current_element(ExtractedLicensingInfo) self.current_element["logger"].append( f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -290,13 +291,12 @@ def p_extr_lic_text_2(self, p): @grammar_rule("file_name : FILE_NAME LINE") def p_file_name_1(self, p): - self.construct_current_element() - self.element_stack.append(p[2]) + self.initialize_new_current_element(File) self.current_element["name"] = p[2] - self.current_element["class"] = File @grammar_rule("file_name : FILE_NAME error") def p_file_name_2(self, p): + self.initialize_new_current_element(File) self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -320,7 +320,6 @@ def p_file_notice_2(self, p): @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") def p_file_cr_text_1(self, p): - self.current_element["copyright_text"] = p[2] @grammar_rule("file_cr_text : FILE_CR_TEXT error") @@ -410,13 +409,14 @@ def p_file_type_value(self, p): @grammar_rule("package_name : PKG_NAME LINE") def p_package_name(self, p): - self.construct_current_element() - self.element_stack.push("package") - self.current_element["class"] = Package + self.initialize_new_current_element(Package) self.current_element["name"] = p[2] @grammar_rule("package_name : PKG_NAME error") def p_package_name_1(self, p): + self.initialize_new_current_element(Package) + self.construct_current_element() + self.current_element["class"] = Package self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -688,12 +688,12 @@ def p_valid_until_date_2(self, p): # parsing methods for snippet @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") def p_snip_spdx_id(self, p): - self.construct_current_element() - self.current_element["class"] = Snippet + self.initialize_new_current_element(Snippet) self.current_element["spdx_id"] = p[2] @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") def p_snip_spdx_id_1(self, p): + self.initialize_new_current_element(Snippet) self.current_element["logger"].append( f"Error while parsing SnippetSPDXID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -813,12 +813,15 @@ def p_snippet_line_range_1(self, p): # parsing methods for annotation def p_annotator_1(self, p): """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" - self.construct_current_element() - self.current_element["annotator"] = ActorParser.parse_actor(p[2]) - self.current_element["class"] = Annotation + self.initialize_new_current_element(Annotation) + try: + self.current_element["annotator"] = ActorParser.parse_actor(p[2]) + except SPDXParsingError as err: + self.current_element["logger"].append(err.get_messages()) @grammar_rule("annotator : ANNOTATOR error") def p_annotator_2(self, p): + self.initialize_new_current_element(Annotation) self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -867,7 +870,7 @@ def p_annotation_spdx_id_2(self, p): @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP relationship_value") def p_relationship_1(self, p): - self.construct_current_element() + self.initialize_new_current_element(Relationship) try: spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") except ValueError: @@ -875,7 +878,6 @@ def p_relationship_1(self, p): f"Relationship couldn't be split in spdx_element_id, relationship_type and " f"related_spdx_element. Line: {p.lineno(1)}") return - self.current_element["class"] = Relationship try: self.current_element["relationship_type"] = RelationshipType[relationship_type] except KeyError: @@ -891,6 +893,7 @@ def p_relationship_1(self, p): @grammar_rule("relationship : RELATIONSHIP error") def p_relationship_2(self, p): + self.initialize_new_current_element(Relationship) self.current_element["logger"].append( f"Error while parsing Relationship: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -948,6 +951,12 @@ def check_that_current_element_matches_class_for_value(self, expected_class): raise SPDXParsingError(["Unexpected current element for value"]) # what to do now? exit parsing + def initialize_new_current_element(self, class_name: Any): + if "class" in self.current_element and "spdx_id" in self.current_element: + self.element_stack.append({self.current_element["class"]: self.current_element["spdx_id"]}) + self.construct_current_element() + self.current_element["class"] = class_name + CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index 12fc37ecd..5247ad4c5 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -47,11 +47,17 @@ def test_relationship(parser, relationship_str, expected_relationship): @pytest.mark.parametrize("relationship_str, expected_message", - [("Relationship: spdx_id DESCRIBES", "Relationship couldn't be split"), - ("Relationship: spdx_id IS spdx_id", "Invalid RelationshipType IS. Line: 1"), + [("Relationship: spdx_id DESCRIBES", + [['Error while parsing Relationship: ["Relationship couldn\'t be split in spdx_element_id, ' + 'relationship_type and related_spdx_element. Line: 1"]']]), + ("Relationship: spdx_id IS spdx_id", + [["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"]]), ("Relationship: spdx_id IS spdx_id\nRelationshipComment: SOURCE", - "Error while parsing Relationship: Token did not match specified grammar rule. Line: 1") + [["Error while parsing Relationship: ['Error while parsing Relationship: Token " + "did not match specified grammar rule. Line: 1']"]]) ]) def test_falsy_relationship(parser, relationship_str, expected_message): - with pytest.raises(SPDXParsingError, match=expected_message): + with pytest.raises(SPDXParsingError) as err: parser.parse(relationship_str) + + assert err.value.get_messages() == expected_message From 5cf98f83b8fa387977beefa44d2a5560e11244a7 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 15:35:47 +0100 Subject: [PATCH 11/43] [issue-382] add test to parse whole document Signed-off-by: Meret Behrens --- .../parser/tagvalue/test_tag_value_parser.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index b4228ebc7..edd0e8aef 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -9,15 +9,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from datetime import datetime -from unittest import TestCase +import os import pytest -from spdx.model.actor import Actor, ActorType -from spdx.model.checksum import Checksum, ChecksumAlgorithm -from spdx.model.external_document_ref import ExternalDocumentRef -from spdx.model.version import Version +from spdx.model.document import Document from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser @@ -34,3 +30,18 @@ def test_unknown_str(parser): with pytest.raises(SPDXParsingError, match="Unknown tag"): parser.parse(unknown_tag_str) + + +def test_parse_file(parser): + fn = os.path.join(os.path.dirname(__file__), "../../data/formats/SPDXTagExample-v2.3.spdx") + + with open(fn) as f: + data = f.read() + doc = parser.parse(data) + assert type(doc) == Document + assert len(doc.annotations) == 5 + assert len(doc.files) == 5 + assert len(doc.packages) == 4 + assert len(doc.snippets) == 1 + assert len(doc.relationships) == 13 + assert len(doc.extracted_licensing_info) == 5 From f26aac08b85d682d6ac83bdab39c64384506aa47 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 15:37:31 +0100 Subject: [PATCH 12/43] [issue-382] add function to parse_checksum Signed-off-by: Meret Behrens --- .../parser/tagvalue/parser/helper_methods.py | 22 +++++++++++ src/spdx/parser/tagvalue/parser/tagvalue.py | 37 ++++++++++--------- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py index 32090810a..63690a829 100644 --- a/src/spdx/parser/tagvalue/parser/helper_methods.py +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -11,6 +11,11 @@ import re from typing import Optional +from spdx.model.checksum import Checksum, ChecksumAlgorithm +from spdx.parser.error import SPDXParsingError +from spdx.parser.logger import Logger +from spdx.parser.parsing_functions import construct_or_raise_parsing_error + def grammar_rule(doc): # this is a helper method to use decorators for the parsing methods instead of docstrings @@ -29,3 +34,20 @@ def str_from_text(text: Optional[str]) -> Optional[str]: return text else: return None + + +def parse_checksum(logger: Logger, checksum_str: str) -> Optional[Checksum]: + try: + algorithm, value = checksum_str.split(":") + except ValueError: + logger.append( + f"Couldn't split value for checksum in algorithm and value.") + return None + algorithm = ChecksumAlgorithm[algorithm.upper().replace("-", "_")] + value = value.strip() + try: + checksum = construct_or_raise_parsing_error(Checksum, {"algorithm": algorithm, "value": value}) + except SPDXParsingError as err: + logger.append(err.get_messages()) + checksum = None + return checksum diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 1f6bc1566..316d8ba71 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -18,7 +18,6 @@ from spdx.datetime_conversions import datetime_from_str from spdx.model.annotation import AnnotationType, Annotation -from spdx.model.checksum import ChecksumAlgorithm, Checksum from spdx.model.external_document_ref import ExternalDocumentRef from spdx.model.extracted_licensing_info import ExtractedLicensingInfo from spdx.model.package import Package, PackageVerificationCode, PackagePurpose, ExternalPackageRef, \ @@ -36,7 +35,7 @@ from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.logger import Logger from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer -from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text +from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum class Parser(object): @@ -189,10 +188,8 @@ def p_ext_doc_refs_1(self, p): document_ref_id = p[2] document_uri = p[3] - splitted_checksum = p[4].split(":") - algorithm = ChecksumAlgorithm[splitted_checksum[0]] - value = splitted_checksum[1].strip() - external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, Checksum(algorithm, value)) + checksum = parse_checksum(self.current_element["logger"], p[4]) + external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) @grammar_rule("ext_doc_ref : EXT_DOC_REF error") @@ -378,10 +375,8 @@ def p_file_type_2(self, p): @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum_1(self, p): - splitted_checksum = p[2].split(":") - algorithm = ChecksumAlgorithm[splitted_checksum[0]] - value = splitted_checksum[1] - self.current_element.setdefault("checksums", []).append(Checksum(algorithm, value)) + checksum = parse_checksum(self.current_element["logger"], p[2]) + self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("file_checksum : FILE_CHECKSUM error") def p_file_checksum_2(self, p): @@ -473,8 +468,20 @@ def p_pkg_ext_refs_1(self, p): comment = None if len(p) == 5: comment = p[4] - external_package_ref = ExternalPackageRef(ExternalPackageRefCategory[category], reference_type, locator, - comment) + try: + category = ExternalPackageRefCategory[category.replace("-", "_")] + except KeyError: + self.current_element["logger"].append(f"Invalid ExternalPackageRefCategory: {category}") + return + try: + external_package_ref = construct_or_raise_parsing_error(ExternalPackageRef, + {"category": category, + "reference_type": reference_type, + "locator": locator, + "comment": comment}) + except SPDXParsingError as err: + self.current_element["logger"].append(err.get_messages()) + return self.current_element.setdefault("external_references", []).append(external_package_ref) @grammar_rule("pkg_ext_ref : PKG_EXT_REF error") @@ -537,10 +544,7 @@ def p_pkg_src_info_2(self, p): @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum_1(self, p): - split_checksum = p[2].split(":") - algorithm = ChecksumAlgorithm[split_checksum[0]] - value = split_checksum[1].strip() - checksum = Checksum(algorithm, value) + checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("pkg_checksum : PKG_CHECKSUM error") @@ -960,4 +964,3 @@ def initialize_new_current_element(self, class_name: Any): CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") - From cbbeb2bb7337990c754f5b5bfc4cfb7ce1c1f0f6 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 15:59:49 +0100 Subject: [PATCH 13/43] [issue-382] add error handling and tests for creation_info_parser Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 7 +++-- .../tagvalue/test_creation_info_parser.py | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 316d8ba71..4ce6b8e03 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -140,8 +140,10 @@ def p_spdx_id(self, p): @grammar_rule("lics_list_ver : LIC_LIST_VER LINE") def p_lics_list_ver_1(self, p): - self.creation_info["license_list_version"] = Version.from_string(p[2]) - + try: + self.creation_info["license_list_version"] = Version.from_string(p[2]) + except ValueError as err: + self.creation_info["logger"].append(err.args[0]) @grammar_rule("lics_list_ver : LIC_LIST_VER error") def p_lics_list_ver_2(self, p): self.creation_info["logger"].append( @@ -185,7 +187,6 @@ def p_doc_name_2(self, p): @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") def p_ext_doc_refs_1(self, p): - document_ref_id = p[2] document_uri = p[3] checksum = parse_checksum(self.current_element["logger"], p[4]) diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index 3aca7aca9..bd430f020 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -17,6 +17,7 @@ from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.model.external_document_ref import ExternalDocumentRef from spdx.model.version import Version +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser DOCUMENT_STR = '\n'.join([ @@ -64,3 +65,30 @@ def test_creation_info(parser): "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", Checksum(ChecksumAlgorithm.SHA1, "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) + + +@pytest.mark.parametrize("document_str, expected_message", + ([('\n'.join( + ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', + 'SPDXID: SPDXRef-DOCUMENT', 'DocumentComment: Sample Comment', + 'DocumentNamespace: Sample Comment', + 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2:htp://spdx.org:SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', + 'Creator: Person Bob (bob@example.com)', 'Creator: Organization: Acme [email]', + 'Created: 2010-02-03T00:00:0Z', 'CreatorComment: Sample Comment', + 'LicenseListVersion: 7']), + [["Error while parsing CreationInfo: " + "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " + "Line: 6', 'Error while parsing ExternalDocumentRef: " + "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " + "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " + "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"]]), + ('\n'.join( + ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', + 'SPDXID: SPDXRef-DOCUMENT']), + ['Error while constructing CreationInfo: CreationInfo.__init__() missing 3 ' + "required positional arguments: 'document_namespace', 'creators', and " + "'created'"])])) +def test_invalid_creation_info(parser, document_str, expected_message): + with pytest.raises(SPDXParsingError) as err: + parser.parse(document_str) + assert err.value.get_messages() == expected_message From 815e7793e1be09bdfed3783e40c662cde696cf84 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Tue, 28 Feb 2023 16:15:16 +0100 Subject: [PATCH 14/43] [issue-382] add contains relationships Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 4ce6b8e03..0cd3dafbc 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -144,6 +144,7 @@ def p_lics_list_ver_1(self, p): self.creation_info["license_list_version"] = Version.from_string(p[2]) except ValueError as err: self.creation_info["logger"].append(err.args[0]) + @grammar_rule("lics_list_ver : LIC_LIST_VER error") def p_lics_list_ver_2(self, p): self.creation_info["logger"].append( @@ -398,7 +399,6 @@ def p_file_conc_2(self, p): "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" " | DOCUMENTATION\n| SPDX \n| OTHER ") def p_file_type_value(self, p): - p[0] = p[1] # parsing methods for package @@ -947,6 +947,8 @@ def construct_current_element(self): try: self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append( construct_or_raise_parsing_error(class_name, self.current_element)) + if class_name == File: + self.check_for_preceding_package_and_build_contains_relationship() except SPDXParsingError as err: self.logger.append(err.get_messages()) self.current_element = {"logger": Logger()} @@ -962,6 +964,15 @@ def initialize_new_current_element(self, class_name: Any): self.construct_current_element() self.current_element["class"] = class_name + def check_for_preceding_package_and_build_contains_relationship(self): + file_spdx_id = self.current_element["spdx_id"] + if "packages" not in self.elements_build: + return + package_spdx_id = self.elements_build["packages"][-1].spdx_id + relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) + if relationship not in self.elements_build["relationships"]: + self.elements_build.setdefault("relationships", []).append(relationship) + CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") From c486af0261096f86ce569c94501a8a6733855abe Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 08:49:52 +0100 Subject: [PATCH 15/43] [issue-382] fix example Signed-off-by: Meret Behrens --- tests/spdx/data/formats/SPDXTagExample-v2.3.spdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/spdx/data/formats/SPDXTagExample-v2.3.spdx b/tests/spdx/data/formats/SPDXTagExample-v2.3.spdx index ca3906159..b84a97a76 100644 --- a/tests/spdx/data/formats/SPDXTagExample-v2.3.spdx +++ b/tests/spdx/data/formats/SPDXTagExample-v2.3.spdx @@ -334,6 +334,7 @@ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. LicenseName: CyberNeko License -LicenseCrossReference: http://people.apache.org/~andyc/neko/LICENSE, http://justasample.url.com +LicenseCrossReference: http://people.apache.org/~andyc/neko/LICENSE +LicenseCrossReference: http://justasample.url.com LicenseComment: This is tye CyperNeko License From 1975cbf91e790dd27ac748b793fc138649bcef27 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 08:56:47 +0100 Subject: [PATCH 16/43] [refactor] build parser when initializing class Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 35 +++++++++++-------- .../parser/tagvalue/test_annotation_parser.py | 15 ++------ .../tagvalue/test_creation_info_parser.py | 13 +++---- .../test_extracted_licensing_info_parser.py | 12 ++----- .../spdx/parser/tagvalue/test_file_parser.py | 16 +++------ .../parser/tagvalue/test_package_parser.py | 11 ++---- .../tagvalue/test_relationship_parser.py | 13 +++---- .../parser/tagvalue/test_snippet_parser.py | 11 ++---- .../parser/tagvalue/test_tag_value_parser.py | 13 +++---- 9 files changed, 46 insertions(+), 93 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 0cd3dafbc..a38c9855f 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -11,43 +11,53 @@ # limitations under the License. import re -from typing import Any +from typing import Any, List, Dict from license_expression import get_spdx_licensing from ply import yacc +from ply.yacc import LRParser from spdx.datetime_conversions import datetime_from_str from spdx.model.annotation import AnnotationType, Annotation +from spdx.model.document import Document, CreationInfo from spdx.model.external_document_ref import ExternalDocumentRef from spdx.model.extracted_licensing_info import ExtractedLicensingInfo +from spdx.model.file import File, FileType from spdx.model.package import Package, PackageVerificationCode, PackagePurpose, ExternalPackageRef, \ ExternalPackageRefCategory from spdx.model.relationship import Relationship, RelationshipType from spdx.model.snippet import Snippet -from spdx.model.version import Version -from spdx.parser.actor_parser import ActorParser - -from spdx.model.document import Document, CreationInfo -from spdx.model.file import File, FileType from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.model.spdx_none import SpdxNone +from spdx.model.version import Version +from spdx.parser.actor_parser import ActorParser from spdx.parser.error import SPDXParsingError -from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.logger import Logger +from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum class Parser(object): - def __init__(self): - self.lex = None - self.yacc = None + tokens: List[str] + logger: Logger + element_stack: List[Dict[str, str]] + current_element: Dict[str, Any] + creation_info: Dict[str, Any] + elements_build: Dict[str, Any] + lex: SPDXLexer + yacc: LRParser + + def __init__(self, **kwargs): self.tokens = SPDXLexer.tokens self.logger = Logger() self.element_stack = [] self.current_element = {"logger": Logger()} self.creation_info = {"logger": Logger()} self.elements_build = dict() + self.lex = SPDXLexer() + self.lex.build(reflags=re.UNICODE) + self.yacc = yacc.yacc(module=self, **kwargs) @grammar_rule("start : start attrib ") def p_start_1(self, p): @@ -915,11 +925,6 @@ def p_relationship_value_without_doc_ref(self, p): def p_error(self, p): pass - def build(self, **kwargs): - self.lex = SPDXLexer() - self.lex.build(reflags=re.UNICODE) - self.yacc = yacc.yacc(module=self, **kwargs) - def parse(self, text): self.yacc.parse(text, lexer=self.lex) self.construct_current_element() diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index 8fbcc2f9b..b40cd6575 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -10,22 +10,13 @@ # limitations under the License. from datetime import datetime -import pytest - +from spdx.model.annotation import AnnotationType from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -from spdx.model.annotation import AnnotationType - - -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - -def test_annotation(parser): +def test_annotation(): + parser = Parser() annotation_str = '\n'.join([ 'Annotator: Person: Jane Doe()', 'AnnotationDate: 2010-01-29T18:30:22Z', diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index bd430f020..6d9e4d427 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -36,14 +36,8 @@ ]) -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - -def test_creation_info(parser): +def test_creation_info(): + parser = Parser() document = parser.parse(DOCUMENT_STR) assert document is not None creation_info = document.creation_info @@ -88,7 +82,8 @@ def test_creation_info(parser): ['Error while constructing CreationInfo: CreationInfo.__init__() missing 3 ' "required positional arguments: 'document_namespace', 'creators', and " "'created'"])])) -def test_invalid_creation_info(parser, document_str, expected_message): +def test_invalid_creation_info(document_str, expected_message): + parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(document_str) assert err.value.get_messages() == expected_message diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py index 7197b0676..7761a202e 100644 --- a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -8,20 +8,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pytest - from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - -def test_extracted_licensing_info(parser): +def test_extracted_licensing_info(): + parser = Parser() extracted_licensing_info_str = '\n'.join([ 'LicenseID: LicenseRef-Beerware-4.2', 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index 15ca0fa59..ec5a9df5d 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -11,21 +11,14 @@ import pytest from license_expression import get_spdx_licensing +from spdx.model.file import FileType from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -from spdx.model.file import FileType - - -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - -def test_file(parser): +def test_file(): + parser = Parser() file_str = '\n'.join([ 'FileName: testfile.java', 'SPDXID: SPDXRef-File', @@ -52,7 +45,8 @@ def test_file(parser): assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") -def test_invalid_file(parser): +def test_invalid_file(): + parser = Parser() file_str = '\n'.join([ 'FileName: testfile.java', 'SPDXID: SPDXRef-File', diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index 4de7ffd41..c17516610 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -11,7 +11,6 @@ from datetime import datetime from unittest import TestCase -import pytest from license_expression import get_spdx_licensing from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory, PackagePurpose @@ -19,14 +18,8 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - -def test_package(parser): +def test_package(): + parser = Parser() package_str = '\n'.join([ 'PackageName: Test', 'SPDXID: SPDXRef-Package', diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index 5247ad4c5..c60e7eb76 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -18,13 +18,6 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - @pytest.mark.parametrize("relationship_str, expected_relationship", [('\n'.join(['Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', 'RelationshipComment: This is a comment.']), @@ -39,7 +32,8 @@ def parser(): Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON, "DocumentRef:AnotherRef")) ]) -def test_relationship(parser, relationship_str, expected_relationship): +def test_relationship(relationship_str, expected_relationship): + parser = Parser() document = parser.parse("\n".join([DOCUMENT_STR, relationship_str])) assert document is not None relationship = document.relationships[0] @@ -56,7 +50,8 @@ def test_relationship(parser, relationship_str, expected_relationship): [["Error while parsing Relationship: ['Error while parsing Relationship: Token " "did not match specified grammar rule. Line: 1']"]]) ]) -def test_falsy_relationship(parser, relationship_str, expected_message): +def test_falsy_relationship(relationship_str, expected_message): + parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(relationship_str) diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index 95f766e52..80a10bd40 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -8,21 +8,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pytest from license_expression import get_spdx_licensing from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - -def test_snippet(parser): +def test_snippet(): + parser = Parser() snippet_str = '\n'.join([ 'SnippetSPDXID: SPDXRef-Snippet', 'SnippetLicenseComments: Some lic comment.', diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index edd0e8aef..af88aab53 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -18,21 +18,16 @@ from spdx.parser.tagvalue.parser.tagvalue import Parser -@pytest.fixture -def parser(): - spdx_parser = Parser() - spdx_parser.build() - return spdx_parser - - -def test_unknown_str(parser): +def test_unknown_str(): + parser = Parser() unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' with pytest.raises(SPDXParsingError, match="Unknown tag"): parser.parse(unknown_tag_str) -def test_parse_file(parser): +def test_parse_file(): + parser = Parser() fn = os.path.join(os.path.dirname(__file__), "../../data/formats/SPDXTagExample-v2.3.spdx") with open(fn) as f: From 0fce282e13c891d3934d3d178cfd5bc3d2d4b4db Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 09:05:40 +0100 Subject: [PATCH 17/43] [issue-382] add tag-value parser to cli tool Signed-off-by: Meret Behrens --- src/spdx/parser/parse_anything.py | 3 ++- .../parser/tagvalue/parser/tagvalue_parser.py | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 src/spdx/parser/tagvalue/parser/tagvalue_parser.py diff --git a/src/spdx/parser/parse_anything.py b/src/spdx/parser/parse_anything.py index 34b15d1dd..b2d1dfd87 100644 --- a/src/spdx/parser/parse_anything.py +++ b/src/spdx/parser/parse_anything.py @@ -11,6 +11,7 @@ from spdx.formats import file_name_to_format, FileFormat from spdx.parser.json import json_parser from spdx.parser.rdf import rdf_parser +from spdx.parser.tagvalue.parser import tagvalue_parser from spdx.parser.xml import xml_parser from spdx.parser.yaml import yaml_parser @@ -20,7 +21,7 @@ def parse_file(file_name: str): if input_format == FileFormat.RDF_XML: return rdf_parser.parse_from_file(file_name) elif input_format == FileFormat.TAG_VALUE: - raise NotImplementedError("Currently, the tag-value parser is not implemented") + return tagvalue_parser.parse_from_file(file_name) elif input_format == FileFormat.JSON: return json_parser.parse_from_file(file_name) elif input_format == FileFormat.XML: diff --git a/src/spdx/parser/tagvalue/parser/tagvalue_parser.py b/src/spdx/parser/tagvalue/parser/tagvalue_parser.py new file mode 100644 index 000000000..ba4a53ead --- /dev/null +++ b/src/spdx/parser/tagvalue/parser/tagvalue_parser.py @@ -0,0 +1,20 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from spdx.model.document import Document +from spdx.parser.tagvalue.parser.tagvalue import Parser + + +def parse_from_file(file_name: str) -> Document: + parser = Parser() + with open(file_name) as file: + data = file.read() + document: Document = parser.parse(data) + return document From d56aedc74a0e125e41af5d6e1522b1220895028a Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 09:32:49 +0100 Subject: [PATCH 18/43] [fix] raise error if name is an empty string in actor_parser Signed-off-by: Meret Behrens --- src/spdx/parser/actor_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/spdx/parser/actor_parser.py b/src/spdx/parser/actor_parser.py index 28c5ad960..7d6406eba 100644 --- a/src/spdx/parser/actor_parser.py +++ b/src/spdx/parser/actor_parser.py @@ -29,14 +29,20 @@ def parse_actor(actor: str) -> Actor: if tool_match: name: str = tool_match.group(1).strip() + if not name: + raise SPDXParsingError([f"No name for Tool provided: {actor}."]) creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.TOOL, name=name)) elif person_match: name: str = person_match.group(1).strip() + if not name: + raise SPDXParsingError([f"No name for Person provided: {actor}."]) email: Optional[str] = ActorParser.get_email_or_none(person_match) creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.PERSON, name=name, email=email)) elif org_match: name: str = org_match.group(1).strip() + if not name: + raise SPDXParsingError([f"No name for Organization provided: {actor}."]) email: Optional[str] = ActorParser.get_email_or_none(org_match) creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.ORGANIZATION, name=name, email=email)) From dc4b2bec260fa68e9e2c738f1beafcb621357116 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 11:25:06 +0100 Subject: [PATCH 19/43] [issue-382] check that current element matches class of parsed value Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 84 +++++++++++++++++---- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index a38c9855f..78caa36ac 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -37,6 +37,11 @@ from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum +CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", + Package="packages", ExtractedLicensingInfo="extracted_licensing_info") +ELEMENT_EXPECTED_START_TAG = dict(File="FileName", Annotation="Annotator", Relationship="Relationship", + Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID") + class Parser(object): tokens: List[str] @@ -254,7 +259,6 @@ def p_extr_lic_id_1(self, p): @grammar_rule("extr_lic_id : LICS_ID error") def p_extr_lic_id_2(self, p): - self.initialize_new_current_element(ExtractedLicensingInfo) self.current_element["logger"].append( f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -271,6 +275,7 @@ def p_lic_xref_2(self, p): @grammar_rule("lic_comment : LICS_COMMENT text_or_line") def p_lic_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["comment"] = p[2] @grammar_rule("lic_comment : LICS_COMMENT error") @@ -280,6 +285,7 @@ def p_lic_comment_2(self, p): @grammar_rule("extr_lic_name : LICS_NAME line_or_no_assertion") def p_extr_lic_name_1(self, p): + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["license_name"] = p[2] @grammar_rule("extr_lic_name : LICS_NAME error") @@ -289,6 +295,7 @@ def p_extr_lic_name_2(self, p): @grammar_rule("extr_lic_text : LICS_TEXT text_or_line") def p_extr_lic_text_1(self, p): + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["extracted_text"] = p[2] @grammar_rule("extr_lic_text : LICS_TEXT error") @@ -311,6 +318,7 @@ def p_file_name_2(self, p): @grammar_rule("file_contrib : FILE_CONTRIB LINE") def p_file_contrib_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("contributors", []).append(p[2]) @grammar_rule("file_contrib : FILE_CONTRIB error") @@ -320,6 +328,7 @@ def p_file_contrib_2(self, p): @grammar_rule("file_notice : FILE_NOTICE text_or_line") def p_file_notice_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element["notice"] = p[2] @grammar_rule("file_notice : FILE_NOTICE error") @@ -329,6 +338,7 @@ def p_file_notice_2(self, p): @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") def p_file_cr_text_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element["copyright_text"] = p[2] @grammar_rule("file_cr_text : FILE_CR_TEXT error") @@ -338,6 +348,7 @@ def p_file_cr_text_2(self, p): @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") def p_file_lics_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element["license_comment"] = p[2] @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") @@ -348,6 +359,7 @@ def p_file_lics_comment_2(self, p): @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") def p_file_attribution_text_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT error") @@ -357,6 +369,7 @@ def p_file_attribution_text_2(self, p): @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") def p_file_lics_info_1(self, p): + self.check_that_current_element_matches_class_for_value(File) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_file"] = p[2] return @@ -369,6 +382,7 @@ def p_file_lics_info_2(self, p): @grammar_rule("file_comment : FILE_COMMENT text_or_line") def p_file_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element["comment"] = p[2] @grammar_rule("file_comment : FILE_COMMENT error") @@ -378,6 +392,7 @@ def p_file_comment_2(self, p): @grammar_rule("file_type : FILE_TYPE file_type_value") def p_file_type_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("file_type", []).append(FileType[p[2]]) @grammar_rule("file_type : FILE_TYPE error") @@ -385,8 +400,15 @@ def p_file_type_2(self, p): self.current_element["logger"].append( f"Error while parsing FileType: Token did not match any of the valid values. Line: {p.lineno(1)}") + @grammar_rule( + "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" + " | DOCUMENTATION\n| SPDX \n| OTHER ") + def p_file_type_value(self, p): + p[0] = p[1] + @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum_1(self, p): + self.check_that_current_element_matches_class_for_value(File) checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) @@ -397,6 +419,7 @@ def p_file_checksum_2(self, p): @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") def p_file_conc_1(self, p): + self.check_that_current_element_matches_class_for_value(File) self.current_element["license_concluded"] = p[2] @grammar_rule("file_conc : FILE_LICS_CONC error") @@ -405,12 +428,6 @@ def p_file_conc_2(self, p): f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule( - "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" - " | DOCUMENTATION\n| SPDX \n| OTHER ") - def p_file_type_value(self, p): - p[0] = p[1] - # parsing methods for package @grammar_rule("package_name : PKG_NAME LINE") @@ -428,6 +445,7 @@ def p_package_name_1(self, p): @grammar_rule("pkg_desc : PKG_DESC text_or_line") def p_pkg_desc_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["description"] = p[2] @grammar_rule("pkg_desc : PKG_DESC error") @@ -437,6 +455,7 @@ def p_pkg_desc_2(self, p): @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") def p_pkg_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["comment"] = p[2] @grammar_rule("pkg_comment : PKG_COMMENT error") @@ -446,6 +465,7 @@ def p_pkg_comment_2(self, p): @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") def p_pkg_attribution_text_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error") @@ -456,6 +476,7 @@ def p_pkg_attribution_text_2(self, p): @grammar_rule("pkg_summary : PKG_SUM text_or_line") def p_pkg_summary_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["summary"] = p[2] @grammar_rule("pkg_summary : PKG_SUM error") @@ -465,6 +486,7 @@ def p_pkg_summary_2(self, p): @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") def p_pkg_cr_text_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["copyright_text"] = p[2] @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") @@ -475,6 +497,7 @@ def p_pkg_cr_text_2(self, p): @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_ext_refs_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) category, reference_type, locator = p[2].split(" ") comment = None if len(p) == 5: @@ -503,6 +526,7 @@ def p_pkg_ext_refs_2(self, p): @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") def p_pkg_lic_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_comment"] = p[2] @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") @@ -513,6 +537,7 @@ def p_pkg_lic_comment_2(self, p): @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") def p_pkg_lic_decl_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_declared"] = p[2] @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") @@ -523,6 +548,7 @@ def p_pkg_lic_decl_2(self, p): @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") def p_pkg_lic_ff_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_from_files"] = p[2] else: @@ -536,6 +562,7 @@ def p_pkg_lic_ff_error(self, p): @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") def p_pkg_lic_conc_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_concluded"] = p[2] @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") @@ -546,6 +573,7 @@ def p_pkg_lic_conc_2(self, p): @grammar_rule("pkg_src_info : PKG_SRC_INFO text_or_line") def p_pkg_src_info_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["source_info"] = p[2] @grammar_rule("pkg_src_info : PKG_SRC_INFO error") @@ -555,6 +583,7 @@ def p_pkg_src_info_2(self, p): @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) @@ -565,6 +594,7 @@ def p_pkg_checksum_2(self, p): @grammar_rule("pkg_verif : PKG_VERF_CODE LINE") def p_pkg_verif_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) verif_code_regex = re.compile(r"([0-9a-f]+)\s*(\(excludes:\s*(.+)\))?", re.UNICODE) verif_code_code_grp = 1 verif_code_exc_files_grp = 3 @@ -583,6 +613,7 @@ def p_pkg_verif_2(self, p): @grammar_rule("pkg_home : PKG_HOME line_or_no_assertion_or_none") def p_pkg_home_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["homepage"] = p[2] @grammar_rule("pkg_home : PKG_HOME error") @@ -592,6 +623,7 @@ def p_pkg_home_2(self, p): @grammar_rule("pkg_down_location : PKG_DOWN line_or_no_assertion_or_none") def p_pkg_down_location_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["download_location"] = p[2] @grammar_rule("pkg_down_location : PKG_DOWN error") @@ -602,6 +634,7 @@ def p_pkg_down_location_2(self, p): @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED LINE") def p_pkg_files_analyzed_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) if p[2] in ['false', 'False']: self.current_element["files_analyzed"] = False if p[2] in ['true', 'True']: @@ -615,6 +648,7 @@ def p_pkg_files_analyzed_2(self, p): @grammar_rule("pkg_orig : PKG_ORIG pkg_supplier_values") def p_pkg_orig_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["originator"] = p[2] @grammar_rule("pkg_orig : PKG_ORIG error") @@ -624,6 +658,7 @@ def p_pkg_orig_2(self, p): @grammar_rule("pkg_supplier : PKG_SUPPL pkg_supplier_values") def p_pkg_supplier_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["supplier"] = p[2] @grammar_rule("pkg_supplier : PKG_SUPPL error") @@ -641,6 +676,7 @@ def p_pkg_supplier_values_2(self, p): @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") def p_pkg_file_name(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["file_name"] = p[2] @grammar_rule("pkg_file_name : PKG_FILE_NAME error") @@ -650,6 +686,7 @@ def p_pkg_file_name_1(self, p): @grammar_rule("package_version : PKG_VERSION LINE") def p_package_version_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["version"] = p[2] @grammar_rule("package_version : PKG_VERSION error") @@ -659,7 +696,7 @@ def p_package_version_2(self, p): @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") def p_primary_package_purpose_1(self, p): - + self.check_that_current_element_matches_class_for_value(Package) self.current_element["primary_package_purpose"] = PackagePurpose[p[2].replace("-", "_")] @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") @@ -675,6 +712,7 @@ def p_primary_package_purpose_value(self, p): @grammar_rule("built_date : BUILT_DATE DATE") def p_built_date_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["built_date"] = datetime_from_str(p[2]) @grammar_rule("built_date : BUILT_DATE error") @@ -684,6 +722,7 @@ def p_built_date_2(self, p): @grammar_rule("release_date : RELEASE_DATE DATE") def p_release_date_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["release_date"] = datetime_from_str(p[2]) @grammar_rule("release_date : RELEASE_DATE error") @@ -693,6 +732,7 @@ def p_release_date_2(self, p): @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") def p_valid_until_date_1(self, p): + self.check_that_current_element_matches_class_for_value(Package) self.current_element["valid_until_date"] = datetime_from_str(p[2]) @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") @@ -714,6 +754,7 @@ def p_snip_spdx_id_1(self, p): @grammar_rule("snip_name : SNIPPET_NAME LINE") def p_snippet_name(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["name"] = p[2] @grammar_rule("snip_name : SNIPPET_NAME error") @@ -723,6 +764,7 @@ def p_snippet_name_1(self, p): @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") def p_snippet_comment(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["comment"] = p[2] @grammar_rule("snip_comment : SNIPPET_COMMENT error") @@ -732,6 +774,7 @@ def p_snippet_comment_1(self, p): @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") def p_snippet_attribution_text_1(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error") @@ -742,6 +785,7 @@ def p_snippet_attribution_text_2(self, p): @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") def p_snippet_cr_text(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["copyright_text"] = p[2] @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") @@ -752,6 +796,7 @@ def p_snippet_cr_text_1(self, p): @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") def p_snippet_lic_comment(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["license_comment"] = p[2] @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") @@ -762,6 +807,7 @@ def p_snippet_lic_comment_1(self, p): @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID LINE") def p_snip_from_file_spdxid(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["file_spdx_id"] = p[2] @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID error") @@ -772,6 +818,7 @@ def p_snip_from_file_spdxid_1(self, p): @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") def p_snippet_concluded_license(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["license_concluded"] = p[2] @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") @@ -782,6 +829,7 @@ def p_snippet_concluded_license_1(self, p): @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") def p_snippet_lics_info(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_snippet"] = p[2] else: @@ -796,6 +844,7 @@ def p_snippet_lics_info_1(self, p): @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") def p_snippet_byte_range(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): self.current_element["logger"].append("Value for SnippetByteRange doesn't match valid range pattern.") @@ -812,6 +861,7 @@ def p_snippet_byte_range_1(self, p): @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_line_range(self, p): + self.check_that_current_element_matches_class_for_value(Snippet) range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): self.current_element["logger"].append("Value for SnippetLineRange doesn't match valid range pattern.") @@ -842,6 +892,7 @@ def p_annotator_2(self, p): @grammar_rule("annotation_date : ANNOTATION_DATE DATE") def p_annotation_date_1(self, p): + self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_date"] = datetime_from_str(p[2]) @grammar_rule("annotation_date : ANNOTATION_DATE error") @@ -851,6 +902,7 @@ def p_annotation_date_2(self, p): @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") def p_annotation_comment_1(self, p): + self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_comment"] = p[2] @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") @@ -860,6 +912,7 @@ def p_annotation_comment_2(self, p): @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") def p_annotation_type_1(self, p): + self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_type"] = AnnotationType[p[2]] @grammar_rule("annotation_type : ANNOTATION_TYPE error") @@ -959,9 +1012,14 @@ def construct_current_element(self): self.current_element = {"logger": Logger()} def check_that_current_element_matches_class_for_value(self, expected_class): - if expected_class != self.current_element["class"]: - raise SPDXParsingError(["Unexpected current element for value"]) - # what to do now? exit parsing + if "class" not in self.current_element: + self.logger.append( + f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") + elif expected_class != self.current_element["class"]: + self.logger.append( + f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") def initialize_new_current_element(self, class_name: Any): if "class" in self.current_element and "spdx_id" in self.current_element: @@ -977,7 +1035,3 @@ def check_for_preceding_package_and_build_contains_relationship(self): relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) if relationship not in self.elements_build["relationships"]: self.elements_build.setdefault("relationships", []).append(relationship) - - -CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", - Package="packages", ExtractedLicensingInfo="extracted_licensing_info") From ae635c3a1c6e46709175688e313b9f3ed6441e00 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 09:33:47 +0100 Subject: [PATCH 20/43] [issue-382] add negative test for annotation_parser Signed-off-by: Meret Behrens --- .../parser/tagvalue/test_annotation_parser.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index b40cd6575..e0b7b515d 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -10,12 +10,15 @@ # limitations under the License. from datetime import datetime +import pytest + from spdx.model.annotation import AnnotationType +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -def test_annotation(): +def test_parse_annotation(): parser = Parser() annotation_str = '\n'.join([ 'Annotator: Person: Jane Doe()', @@ -33,3 +36,27 @@ def test_annotation(): assert annotation.annotation_comment == 'Document level annotation' assert annotation.annotation_type == AnnotationType.OTHER assert annotation.spdx_id == 'SPDXRef-DOCUMENT' + + +@pytest.mark.parametrize("annotation_str, expected_message", [ + ('Annotator: Person: Jane Doe()', [['Error while constructing Annotation: Annotation.__init__() missing 4 ' + "required positional arguments: 'spdx_id', 'annotation_type', " + "'annotation_date', and 'annotation_comment'"]]), + ('Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23', + [["Error while parsing Annotation: ['Error while parsing AnnotationType: Token " + "did not match specified grammar rule. Line: 2', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 3']"]]), + ('Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n' + 'AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT', + [["Error while parsing Annotation: ['Error while parsing Annotator: Token did " + "not match specified grammar rule. Line: 1', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 2']"]]), + ('Annotator: Person: ()', [["Error while parsing Annotation: [['No name for Person provided: Person: ().']]"]]), + ('AnnotationType: REVIEW', ['Element Annotation is not the current element in scope, probably the ' + 'expected tag to start the element (Annotator) is missing.'])]) +def test_parse_invalid_annotation(annotation_str, expected_message): + parser = Parser() + with pytest.raises(SPDXParsingError) as err: + parser.parse(annotation_str) + + assert err.value.get_messages() == expected_message From d186f4841775c8f43dd6b8502a56a50eb6603500 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 11:25:19 +0100 Subject: [PATCH 21/43] [issue-382] add negative tests for extracted_licensing_info Signed-off-by: Meret Behrens --- .../test_extracted_licensing_info_parser.py | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py index 7761a202e..753577cb2 100644 --- a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -8,6 +8,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import TestCase + +import pytest + +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @@ -16,9 +21,12 @@ def test_extracted_licensing_info(): parser = Parser() extracted_licensing_info_str = '\n'.join([ 'LicenseID: LicenseRef-Beerware-4.2', - 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you ' + 'retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this ' + 'stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' 'LicenseName: Beer-Ware License (Version 42)', 'LicenseCrossReference: http://people.freebsd.org/~phk/', + 'LicenseCrossReference: http://another.cross.reference/', 'LicenseComment: The beerware license has a couple of other standard variants.' ]) document = parser.parse("\n".join([DOCUMENT_STR, extracted_licensing_info_str])) @@ -26,7 +34,31 @@ def test_extracted_licensing_info(): assert len(document.extracted_licensing_info) == 1 extracted_licensing_info = document.extracted_licensing_info[0] assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" - assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. ' \ + 'As long as you retain this notice you can do whatever you want with this stuff. ' \ + 'If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" - assert extracted_licensing_info.cross_references == ["http://people.freebsd.org/~phk/"] + TestCase().assertCountEqual(extracted_licensing_info.cross_references, + ["http://people.freebsd.org/~phk/", "http://another.cross.reference/"]) assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." + + +def test_parse_invalid_licensing_info(): + parser = Parser() + extracted_licensing_info_str = '\n'.join([ + 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + 'LicenseName: Beer-Ware License (Version 42)', + 'LicenseCrossReference: http://people.freebsd.org/~phk/', + 'LicenseComment: The beerware license has a couple of other standard variants.']) + + with pytest.raises(SPDXParsingError) as err: + parser.parse(extracted_licensing_info_str) + + assert err.value.get_messages() == ['Element ExtractedLicensingInfo is not the current element in scope, probably ' + 'the expected tag to start the element (LicenseID) is missing.', + 'Element ExtractedLicensingInfo is not the current element in scope, probably ' + 'the expected tag to start the element (LicenseID) is missing.', + 'Element ExtractedLicensingInfo is not the current element in scope, probably ' + 'the expected tag to start the element (LicenseID) is missing.', + 'Element ExtractedLicensingInfo is not the current element in scope, probably ' + 'the expected tag to start the element (LicenseID) is missing.'] From d89a2c8b42d86ad5b3c32a4b04f28989dfea1f49 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 11:55:13 +0100 Subject: [PATCH 22/43] [issue-382] rename parsing methods Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 283 ++++++++++---------- 1 file changed, 141 insertions(+), 142 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 78caa36ac..6a905e6dd 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -65,11 +65,11 @@ def __init__(self, **kwargs): self.yacc = yacc.yacc(module=self, **kwargs) @grammar_rule("start : start attrib ") - def p_start_1(self, p): + def p_start_start_attrib(self, p): pass @grammar_rule("start : attrib ") - def p_start_2(self, p): + def p_start_attrib(self, p): pass @grammar_rule("attrib : spdx_version\n| spdx_id\n| data_lics\n| doc_name\n| doc_comment\n| doc_namespace\n| " @@ -103,19 +103,19 @@ def p_unknown_tag(self, p): self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") @grammar_rule("text_or_line : TEXT") - def p_text_or_line_value_1(self, p): + def p_text_or_line_value(self, p): p[0] = str_from_text(p[1]) @grammar_rule("text_or_line : LINE") - def p_text_or_line_value_2(self, p): + def p_text_or_line_value_error(self, p): p[0] = p[1] @grammar_rule("license_or_no_assertion_or_none : NO_ASSERTION") - def p_license_or_no_assertion_or_none_1(self, p): + def p_license_or_no_assertion_or_none(self, p): p[0] = SpdxNoAssertion() @grammar_rule("license_or_no_assertion_or_none : NONE") - def p_license_or_no_assertion_or_none_2(self, p): + def p_license_or_no_assertion_or_none_error(self, p): p[0] = SpdxNone() @grammar_rule("license_or_no_assertion_or_none : LINE") @@ -123,23 +123,23 @@ def p_license_or_no_assertion_or_none_3(self, p): p[0] = get_spdx_licensing().parse(p[1]) @grammar_rule("line_or_no_assertion : LINE") - def p_line_or_no_assertion_1(self, p): + def p_line_or_no_assertion(self, p): p[0] = p[1] @grammar_rule("line_or_no_assertion : NO_ASSERTION") - def p_line_or_no_assertion_2(self, p): + def p_line_or_no_assertion_error(self, p): p[0] = SpdxNoAssertion() @grammar_rule("line_or_no_assertion_or_none : text_or_line") - def p_line_1(self, p): + def p_line(self, p): p[0] = p[1] @grammar_rule("line_or_no_assertion_or_none : NO_ASSERTION") - def p_no_assertion_2(self, p): + def p_no_assertion_error(self, p): p[0] = SpdxNoAssertion() @grammar_rule("line_or_no_assertion_or_none : NONE") - def p_none_2(self, p): + def p_none_error(self, p): p[0] = SpdxNoAssertion() @grammar_rule("spdx_id : SPDX_ID LINE") @@ -154,221 +154,221 @@ def p_spdx_id(self, p): # parsing methods for creation info / document level @grammar_rule("lics_list_ver : LIC_LIST_VER LINE") - def p_lics_list_ver_1(self, p): + def p_license_list_version(self, p): try: self.creation_info["license_list_version"] = Version.from_string(p[2]) except ValueError as err: self.creation_info["logger"].append(err.args[0]) @grammar_rule("lics_list_ver : LIC_LIST_VER error") - def p_lics_list_ver_2(self, p): + def p_license_list_version_error(self, p): self.creation_info["logger"].append( f"Error while parsing LicenseListVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_comment : DOC_COMMENT text_or_line") - def p_doc_comment_1(self, p): + def p_doc_comment(self, p): self.creation_info["document_comment"] = p[2] @grammar_rule("doc_comment : DOC_COMMENT error") - def p_doc_comment_2(self, p): + def p_doc_comment_error(self, p): self.creation_info["logger"].append( f"Error while parsing DocumentComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_namespace : DOC_NAMESPACE LINE") - def p_doc_namespace_1(self, p): + def p_doc_namespace(self, p): self.creation_info["document_namespace"] = p[2] @grammar_rule("doc_namespace : DOC_NAMESPACE error") - def p_doc_namespace_2(self, p): + def p_doc_namespace_error(self, p): self.creation_info["logger"].append( f"Error while parsing DocumentNamespace: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("data_lics : DOC_LICENSE LINE") - def p_data_license_1(self, p): + def p_data_license(self, p): self.creation_info["data_license"] = p[2] @grammar_rule("data_lics : DOC_LICENSE error") - def p_data_license_2(self, p): + def p_data_license_error(self, p): self.creation_info["logger"].append( f"Error while parsing DataLicense: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_name : DOC_NAME LINE") - def p_doc_name_1(self, p): + def p_doc_name(self, p): self.creation_info["name"] = p[2] @grammar_rule("doc_name : DOC_NAME error") - def p_doc_name_2(self, p): + def p_doc_name_error(self, p): self.creation_info["logger"].append( f"Error while parsing DocumentName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") - def p_ext_doc_refs_1(self, p): + def p_external_document_ref(self, p): document_ref_id = p[2] document_uri = p[3] - checksum = parse_checksum(self.current_element["logger"], p[4]) + checksum = parse_checksum(self.creation_info["logger"], p[4]) external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) @grammar_rule("ext_doc_ref : EXT_DOC_REF error") - def p_ext_doc_refs_2(self, p): + def p_external_document_ref_error(self, p): self.creation_info["logger"].append( f"Error while parsing ExternalDocumentRef: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("spdx_version : DOC_VERSION LINE") - def p_spdx_version_1(self, p): + def p_spdx_version(self, p): self.creation_info["spdx_version"] = p[2] @grammar_rule("spdx_version : DOC_VERSION error") - def p_spdx_version_2(self, p): + def p_spdx_version_error(self, p): self.creation_info["logger"].append( f"Error while parsing SPDXVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") - def p_creator_comment_1(self, p): + def p_creator_comment(self, p): self.creation_info["creator_comment"] = p[2] @grammar_rule("creator_comment : CREATOR_COMMENT error") - def p_creator_comment_2(self, p): + def p_creator_comment_error(self, p): self.creation_info["logger"].append( f"Error while parsing CreatorComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - def p_creator_1(self, p): + def p_creator(self, p): """creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE""" self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) @grammar_rule("creator : CREATOR error") - def p_creator_2(self, p): + def p_creator_error(self, p): self.creation_info["logger"].append( f"Error while parsing Creator: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("created : CREATED DATE") - def p_created_1(self, p): + def p_created(self, p): self.creation_info["created"] = datetime_from_str(p[2]) @grammar_rule("created : CREATED error") - def p_created_2(self, p): + def p_created_error(self, p): self.creation_info["logger"].append( f"Error while parsing Created: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for extracted licensing info @grammar_rule("extr_lic_id : LICS_ID LINE") - def p_extr_lic_id_1(self, p): + def p_extracted_license_id(self, p): self.initialize_new_current_element(ExtractedLicensingInfo) self.current_element["license_id"] = p[2] @grammar_rule("extr_lic_id : LICS_ID error") - def p_extr_lic_id_2(self, p): + def p_extracted_license_id_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("lic_xref : LICS_CRS_REF LINE") - def p_lic_xref_1(self, p): + def p_extracted_cross_reference(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element.setdefault("cross_references", []).append(p[2]) @grammar_rule("lic_xref : LICS_CRS_REF error") - def p_lic_xref_2(self, p): + def p_extracted_cross_reference_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseCrossReference: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("lic_comment : LICS_COMMENT text_or_line") - def p_lic_comment_1(self, p): + def p_license_comment(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["comment"] = p[2] @grammar_rule("lic_comment : LICS_COMMENT error") - def p_lic_comment_2(self, p): + def p_license_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("extr_lic_name : LICS_NAME line_or_no_assertion") - def p_extr_lic_name_1(self, p): + def p_extracted_license_name(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["license_name"] = p[2] @grammar_rule("extr_lic_name : LICS_NAME error") - def p_extr_lic_name_2(self, p): + def p_extracted_license_name_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("extr_lic_text : LICS_TEXT text_or_line") - def p_extr_lic_text_1(self, p): + def p_extracted_license_text(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element["extracted_text"] = p[2] @grammar_rule("extr_lic_text : LICS_TEXT error") - def p_extr_lic_text_2(self, p): + def p_extracted_license_text_error(self, p): self.current_element["logger"].append( f"Error while parsing ExtractedText: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for file @grammar_rule("file_name : FILE_NAME LINE") - def p_file_name_1(self, p): + def p_file_name(self, p): self.initialize_new_current_element(File) self.current_element["name"] = p[2] @grammar_rule("file_name : FILE_NAME error") - def p_file_name_2(self, p): + def p_file_name_error(self, p): self.initialize_new_current_element(File) self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_contrib : FILE_CONTRIB LINE") - def p_file_contrib_1(self, p): + def p_file_contributor(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("contributors", []).append(p[2]) @grammar_rule("file_contrib : FILE_CONTRIB error") - def p_file_contrib_2(self, p): + def p_file_contributor_error(self, p): self.current_element["logger"].append( f"Error while parsing FileContributor: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_notice : FILE_NOTICE text_or_line") - def p_file_notice_1(self, p): + def p_file_notice(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element["notice"] = p[2] @grammar_rule("file_notice : FILE_NOTICE error") - def p_file_notice_2(self, p): + def p_file_notice_error(self, p): self.current_element["logger"].append( f"Error while parsing FileNotice: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") - def p_file_cr_text_1(self, p): + def p_file_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element["copyright_text"] = p[2] @grammar_rule("file_cr_text : FILE_CR_TEXT error") - def p_file_cr_text_2(self, p): + def p_file_copyright_text_error(self, p): self.current_element["logger"].append( f"Error while parsing FileCopyrightText: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") - def p_file_lics_comment_1(self, p): + def p_file_license_comment(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element["license_comment"] = p[2] @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") - def p_file_lics_comment_2(self, p): + def p_file_license_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseComments in file: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") - def p_file_attribution_text_1(self, p): + def p_file_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT error") - def p_file_attribution_text_2(self, p): + def p_file_attribution_text_error(self, p): self.current_element["logger"].append( f"Error while parsing FileAttributionText: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") - def p_file_lics_info_1(self, p): + def p_file_license_info(self, p): self.check_that_current_element_matches_class_for_value(File) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_file"] = p[2] @@ -376,27 +376,27 @@ def p_file_lics_info_1(self, p): self.current_element.setdefault("license_info_in_file", []).append(p[2]) @grammar_rule("file_lics_info : FILE_LICS_INFO error") - def p_file_lics_info_2(self, p): + def p_file_license_info_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseInfoInFile: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_comment : FILE_COMMENT text_or_line") - def p_file_comment_1(self, p): + def p_file_comment(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element["comment"] = p[2] @grammar_rule("file_comment : FILE_COMMENT error") - def p_file_comment_2(self, p): + def p_file_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing FileComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_type : FILE_TYPE file_type_value") - def p_file_type_1(self, p): + def p_file_type(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("file_type", []).append(FileType[p[2]]) @grammar_rule("file_type : FILE_TYPE error") - def p_file_type_2(self, p): + def p_file_type_error(self, p): self.current_element["logger"].append( f"Error while parsing FileType: Token did not match any of the valid values. Line: {p.lineno(1)}") @@ -407,23 +407,23 @@ def p_file_type_value(self, p): p[0] = p[1] @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") - def p_file_checksum_1(self, p): + def p_file_checksum(self, p): self.check_that_current_element_matches_class_for_value(File) checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("file_checksum : FILE_CHECKSUM error") - def p_file_checksum_2(self, p): + def p_file_checksum_error(self, p): self.current_element["logger"].append( f"Error while parsing Checksum in file: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") - def p_file_conc_1(self, p): + def p_file_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element["license_concluded"] = p[2] @grammar_rule("file_conc : FILE_LICS_CONC error") - def p_file_conc_2(self, p): + def p_file_license_concluded_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -436,7 +436,7 @@ def p_package_name(self, p): self.current_element["name"] = p[2] @grammar_rule("package_name : PKG_NAME error") - def p_package_name_1(self, p): + def p_package_name_error(self, p): self.initialize_new_current_element(Package) self.construct_current_element() self.current_element["class"] = Package @@ -444,59 +444,59 @@ def p_package_name_1(self, p): f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_desc : PKG_DESC text_or_line") - def p_pkg_desc_1(self, p): + def p_pkg_description(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["description"] = p[2] @grammar_rule("pkg_desc : PKG_DESC error") - def p_pkg_desc_2(self, p): + def p_pkg_description_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageDescription: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") - def p_pkg_comment_1(self, p): + def p_pkg_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["comment"] = p[2] @grammar_rule("pkg_comment : PKG_COMMENT error") - def p_pkg_comment_2(self, p): + def p_pkg_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") - def p_pkg_attribution_text_1(self, p): + def p_pkg_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error") - def p_pkg_attribution_text_2(self, p): + def p_pkg_attribution_text_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_summary : PKG_SUM text_or_line") - def p_pkg_summary_1(self, p): + def p_pkg_summary(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["summary"] = p[2] @grammar_rule("pkg_summary : PKG_SUM error") - def p_pkg_summary_2(self, p): + def p_pkg_summary_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSummary: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") - def p_pkg_cr_text_1(self, p): + def p_pkg_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["copyright_text"] = p[2] @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") - def p_pkg_cr_text_2(self, p): + def p_pkg_copyright_text_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageCopyrightText: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") - def p_pkg_ext_refs_1(self, p): + def p_pkg_external_refs(self, p): self.check_that_current_element_matches_class_for_value(Package) category, reference_type, locator = p[2].split(" ") comment = None @@ -519,35 +519,35 @@ def p_pkg_ext_refs_1(self, p): self.current_element.setdefault("external_references", []).append(external_package_ref) @grammar_rule("pkg_ext_ref : PKG_EXT_REF error") - def p_pkg_ext_refs_2(self, p): + def p_pkg_external_refs_error(self, p): self.current_element["logger"].append( f"Error while parsing ExternalRef in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") - def p_pkg_lic_comment_1(self, p): + def p_pkg_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_comment"] = p[2] @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") - def p_pkg_lic_comment_2(self, p): + def p_pkg_license_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageLicenseComments: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") - def p_pkg_lic_decl_1(self, p): + def p_pkg_license_declared(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_declared"] = p[2] @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") - def p_pkg_lic_decl_2(self, p): + def p_pkg_license_declared_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseDeclared in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") - def p_pkg_lic_ff_1(self, p): + def p_pkg_license_info_from_file(self, p): self.check_that_current_element_matches_class_for_value(Package) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_from_files"] = p[2] @@ -555,45 +555,45 @@ def p_pkg_lic_ff_1(self, p): self.current_element.setdefault("license_info_from_files", []).append(p[2]) @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE error") - def p_pkg_lic_ff_error(self, p): + def p_pkg_license_info_from_file_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseInfoFromFiles in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") - def p_pkg_lic_conc_1(self, p): + def p_pkg_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["license_concluded"] = p[2] @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") - def p_pkg_lic_conc_2(self, p): + def p_pkg_license_concluded_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseConcluded in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_src_info : PKG_SRC_INFO text_or_line") - def p_pkg_src_info_1(self, p): + def p_pkg_source_info(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["source_info"] = p[2] @grammar_rule("pkg_src_info : PKG_SRC_INFO error") - def p_pkg_src_info_2(self, p): + def p_pkg_source_info_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSourceInfo: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") - def p_pkg_checksum_1(self, p): + def p_pkg_checksum(self, p): self.check_that_current_element_matches_class_for_value(Package) checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("pkg_checksum : PKG_CHECKSUM error") - def p_pkg_checksum_2(self, p): + def p_pkg_checksum_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageChecksum: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_verif : PKG_VERF_CODE LINE") - def p_pkg_verif_1(self, p): + def p_pkg_verification_code(self, p): self.check_that_current_element_matches_class_for_value(Package) verif_code_regex = re.compile(r"([0-9a-f]+)\s*(\(excludes:\s*(.+)\))?", re.UNICODE) verif_code_code_grp = 1 @@ -606,34 +606,34 @@ def p_pkg_verif_1(self, p): self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) @grammar_rule("pkg_verif : PKG_VERF_CODE error") - def p_pkg_verif_2(self, p): + def p_pkg_verification_code_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_home : PKG_HOME line_or_no_assertion_or_none") - def p_pkg_home_1(self, p): + def p_pkg_homepage(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["homepage"] = p[2] @grammar_rule("pkg_home : PKG_HOME error") - def p_pkg_home_2(self, p): + def p_pkg_homepage_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageHomePage: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_down_location : PKG_DOWN line_or_no_assertion_or_none") - def p_pkg_down_location_1(self, p): + def p_pkg_download_location(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["download_location"] = p[2] @grammar_rule("pkg_down_location : PKG_DOWN error") - def p_pkg_down_location_2(self, p): + def p_pkg_download_location_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED LINE") - def p_pkg_files_analyzed_1(self, p): + def p_pkg_files_analyzed(self, p): self.check_that_current_element_matches_class_for_value(Package) if p[2] in ['false', 'False']: self.current_element["files_analyzed"] = False @@ -641,28 +641,28 @@ def p_pkg_files_analyzed_1(self, p): self.current_element["files_analyzed"] = True @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED error") - def p_pkg_files_analyzed_2(self, p): + def p_pkg_files_analyzed_error(self, p): self.current_element["logger"].append( f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("pkg_orig : PKG_ORIG pkg_supplier_values") - def p_pkg_orig_1(self, p): + def p_pkg_originator(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["originator"] = p[2] @grammar_rule("pkg_orig : PKG_ORIG error") - def p_pkg_orig_2(self, p): + def p_pkg_originator_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("pkg_supplier : PKG_SUPPL pkg_supplier_values") - def p_pkg_supplier_1(self, p): + def p_pkg_supplier(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["supplier"] = p[2] @grammar_rule("pkg_supplier : PKG_SUPPL error") - def p_pkg_supplier_2(self, p): + def p_pkg_supplier_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -680,27 +680,27 @@ def p_pkg_file_name(self, p): self.current_element["file_name"] = p[2] @grammar_rule("pkg_file_name : PKG_FILE_NAME error") - def p_pkg_file_name_1(self, p): + def p_pkg_file_name_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageFileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("package_version : PKG_VERSION LINE") - def p_package_version_1(self, p): + def p_package_version(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["version"] = p[2] @grammar_rule("package_version : PKG_VERSION error") - def p_package_version_2(self, p): + def p_package_version_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") - def p_primary_package_purpose_1(self, p): + def p_primary_package_purpose(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["primary_package_purpose"] = PackagePurpose[p[2].replace("-", "_")] @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") - def p_primary_package_purpose_2(self, p): + def p_primary_package_purpose_error(self, p): self.current_element["logger"].append( f"Error while parsing PrimaryPackagePurpose: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -711,43 +711,43 @@ def p_primary_package_purpose_value(self, p): p[0] = p[1] @grammar_rule("built_date : BUILT_DATE DATE") - def p_built_date_1(self, p): + def p_built_date(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["built_date"] = datetime_from_str(p[2]) @grammar_rule("built_date : BUILT_DATE error") - def p_built_date_2(self, p): + def p_built_date_error(self, p): self.current_element["logger"].append( f"Error while parsing BuiltDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("release_date : RELEASE_DATE DATE") - def p_release_date_1(self, p): + def p_release_date(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["release_date"] = datetime_from_str(p[2]) @grammar_rule("release_date : RELEASE_DATE error") - def p_release_date_2(self, p): + def p_release_date_error(self, p): self.current_element["logger"].append( f"Error while parsing ReleaseDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") - def p_valid_until_date_1(self, p): + def p_valid_until_date(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["valid_until_date"] = datetime_from_str(p[2]) @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") - def p_valid_until_date_2(self, p): + def p_valid_until_date_error(self, p): self.current_element["logger"].append( f"Error while parsing ValidUntilDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for snippet @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") - def p_snip_spdx_id(self, p): + def p_snippet_spdx_id(self, p): self.initialize_new_current_element(Snippet) self.current_element["spdx_id"] = p[2] @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") - def p_snip_spdx_id_1(self, p): + def p_snippet_spdx_id_error(self, p): self.initialize_new_current_element(Snippet) self.current_element["logger"].append( f"Error while parsing SnippetSPDXID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -758,7 +758,7 @@ def p_snippet_name(self, p): self.current_element["name"] = p[2] @grammar_rule("snip_name : SNIPPET_NAME error") - def p_snippet_name_1(self, p): + def p_snippet_name_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetName: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -768,50 +768,50 @@ def p_snippet_comment(self, p): self.current_element["comment"] = p[2] @grammar_rule("snip_comment : SNIPPET_COMMENT error") - def p_snippet_comment_1(self, p): + def p_snippet_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") - def p_snippet_attribution_text_1(self, p): + def p_snippet_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Snippet) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error") - def p_snippet_attribution_text_2(self, p): + def p_snippet_attribution_text_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetAttributionText: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") - def p_snippet_cr_text(self, p): + def p_snippet_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["copyright_text"] = p[2] @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") - def p_snippet_cr_text_1(self, p): + def p_snippet_copyright_text_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetCopyrightText: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") - def p_snippet_lic_comment(self, p): + def p_snippet_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["license_comment"] = p[2] @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") - def p_snippet_lic_comment_1(self, p): + def p_snippet_license_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID LINE") - def p_snip_from_file_spdxid(self, p): + def p_snippet_from_file_spdxid(self, p): self.check_that_current_element_matches_class_for_value(Snippet) self.current_element["file_spdx_id"] = p[2] @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID error") - def p_snip_from_file_spdxid_1(self, p): + def p_snippet_from_file_spdxid_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -822,13 +822,13 @@ def p_snippet_concluded_license(self, p): self.current_element["license_concluded"] = p[2] @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") - def p_snippet_concluded_license_1(self, p): + def p_snippet_concluded_license_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetLicenseConcluded: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") - def p_snippet_lics_info(self, p): + def p_snippet_license_info(self, p): self.check_that_current_element_matches_class_for_value(Snippet) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_snippet"] = p[2] @@ -836,7 +836,7 @@ def p_snippet_lics_info(self, p): self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO error") - def p_snippet_lics_info_1(self, p): + def p_snippet_license_info_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " @@ -854,8 +854,7 @@ def p_snippet_byte_range(self, p): self.current_element["byte_range"] = startpoint, endpoint @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE error") - def p_snippet_byte_range_1(self, p): - + def p_snippet_byte_range_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetByteRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -871,12 +870,12 @@ def p_snippet_line_range(self, p): self.current_element["line_range"] = startpoint, endpoint @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE error") - def p_snippet_line_range_1(self, p): + def p_snippet_line_range_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetLineRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") # parsing methods for annotation - def p_annotator_1(self, p): + def p_annotator(self, p): """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" self.initialize_new_current_element(Annotation) try: @@ -885,38 +884,38 @@ def p_annotator_1(self, p): self.current_element["logger"].append(err.get_messages()) @grammar_rule("annotator : ANNOTATOR error") - def p_annotator_2(self, p): + def p_annotator_error(self, p): self.initialize_new_current_element(Annotation) self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_date : ANNOTATION_DATE DATE") - def p_annotation_date_1(self, p): + def p_annotation_date(self, p): self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_date"] = datetime_from_str(p[2]) @grammar_rule("annotation_date : ANNOTATION_DATE error") - def p_annotation_date_2(self, p): + def p_annotation_date_error(self, p): self.current_element["logger"].append( f"Error while parsing AnnotationDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") - def p_annotation_comment_1(self, p): + def p_annotation_comment(self, p): self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_comment"] = p[2] @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") - def p_annotation_comment_2(self, p): + def p_annotation_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing AnnotationComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") - def p_annotation_type_1(self, p): + def p_annotation_type(self, p): self.check_that_current_element_matches_class_for_value(Annotation) self.current_element["annotation_type"] = AnnotationType[p[2]] @grammar_rule("annotation_type : ANNOTATION_TYPE error") - def p_annotation_type_2(self, p): + def p_annotation_type_error(self, p): self.current_element["logger"].append( f"Error while parsing AnnotationType: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -925,11 +924,11 @@ def p_annotation_type_value(self, p): p[0] = p[1] @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID LINE") - def p_annotation_spdx_id_1(self, p): + def p_annotation_spdx_id(self, p): self.current_element["spdx_id"] = p[2] @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID error") - def p_annotation_spdx_id_2(self, p): + def p_annotation_spdx_id_error(self, p): self.current_element["logger"].append( f"Error while parsing SPDXREF in annotation: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -937,7 +936,7 @@ def p_annotation_spdx_id_2(self, p): # parsing methods for relationship @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP relationship_value") - def p_relationship_1(self, p): + def p_relationship(self, p): self.initialize_new_current_element(Relationship) try: spdx_element_id, relationship_type, related_spdx_element_id = p[2].split(" ") @@ -960,7 +959,7 @@ def p_relationship_1(self, p): self.current_element["comment"] = p[4] @grammar_rule("relationship : RELATIONSHIP error") - def p_relationship_2(self, p): + def p_relationship_error(self, p): self.initialize_new_current_element(Relationship) self.current_element["logger"].append( f"Error while parsing Relationship: Token did not match specified grammar rule. Line: {p.lineno(1)}") From 0443b8445af8509137fb0846f650f8af5cea01c5 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 14:41:42 +0100 Subject: [PATCH 23/43] [refactor] merge parsing functions that return the same values Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 53 ++++++--------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 6a905e6dd..2e7e32cd3 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -103,44 +103,29 @@ def p_unknown_tag(self, p): self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") @grammar_rule("text_or_line : TEXT") - def p_text_or_line_value(self, p): + def p_text(self, p): p[0] = str_from_text(p[1]) - @grammar_rule("text_or_line : LINE") - def p_text_or_line_value_error(self, p): + @grammar_rule("text_or_line : LINE\n line_or_no_assertion : LINE\nline_or_no_assertion_or_none : text_or_line") + def p_line(self, p): p[0] = p[1] - @grammar_rule("license_or_no_assertion_or_none : NO_ASSERTION") - def p_license_or_no_assertion_or_none(self, p): + @grammar_rule("license_or_no_assertion_or_none : NO_ASSERTION\n actor_or_no_assertion : NO_ASSERTION\n" + "line_or_no_assertion : NO_ASSERTION\n line_or_no_assertion_or_none : NO_ASSERTION") + def p_no_assertion(self, p): p[0] = SpdxNoAssertion() - @grammar_rule("license_or_no_assertion_or_none : NONE") - def p_license_or_no_assertion_or_none_error(self, p): + @grammar_rule("license_or_no_assertion_or_none : NONE\n line_or_no_assertion_or_none : NONE") + def p_none(self, p): p[0] = SpdxNone() @grammar_rule("license_or_no_assertion_or_none : LINE") - def p_license_or_no_assertion_or_none_3(self, p): + def p_license(self, p): p[0] = get_spdx_licensing().parse(p[1]) - @grammar_rule("line_or_no_assertion : LINE") - def p_line_or_no_assertion(self, p): - p[0] = p[1] - - @grammar_rule("line_or_no_assertion : NO_ASSERTION") - def p_line_or_no_assertion_error(self, p): - p[0] = SpdxNoAssertion() - - @grammar_rule("line_or_no_assertion_or_none : text_or_line") - def p_line(self, p): - p[0] = p[1] - - @grammar_rule("line_or_no_assertion_or_none : NO_ASSERTION") - def p_no_assertion_error(self, p): - p[0] = SpdxNoAssertion() - - @grammar_rule("line_or_no_assertion_or_none : NONE") - def p_none_error(self, p): - p[0] = SpdxNoAssertion() + @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORG_VALUE") + def p_actor_values(self, p): + p[0] = ActorParser.parse_actor(p[1]) @grammar_rule("spdx_id : SPDX_ID LINE") def p_spdx_id(self, p): @@ -438,8 +423,6 @@ def p_package_name(self, p): @grammar_rule("package_name : PKG_NAME error") def p_package_name_error(self, p): self.initialize_new_current_element(Package) - self.construct_current_element() - self.current_element["class"] = Package self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -646,7 +629,7 @@ def p_pkg_files_analyzed_error(self, p): f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("pkg_orig : PKG_ORIG pkg_supplier_values") + @grammar_rule("pkg_orig : PKG_ORIG actor_or_no_assertion") def p_pkg_originator(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["originator"] = p[2] @@ -656,7 +639,7 @@ def p_pkg_originator_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_supplier : PKG_SUPPL pkg_supplier_values") + @grammar_rule("pkg_supplier : PKG_SUPPL actor_or_no_assertion") def p_pkg_supplier(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element["supplier"] = p[2] @@ -666,14 +649,6 @@ def p_pkg_supplier_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_supplier_values : NO_ASSERTION") - def p_pkg_supplier_values_1(self, p): - p[0] = SpdxNoAssertion() - - @grammar_rule("pkg_supplier_values : PERSON_VALUE\n | ORG_VALUE\n | TOOL_VALUE") - def p_pkg_supplier_values_2(self, p): - p[0] = ActorParser.parse_actor(p[1]) - @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") def p_pkg_file_name(self, p): self.check_that_current_element_matches_class_for_value(Package) From e348289a4beea1531122f593b4382547612da499 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 15:34:13 +0100 Subject: [PATCH 24/43] [refactor] creation_info_parser Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 91 +++++++++++-------- .../tagvalue/test_creation_info_parser.py | 5 +- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 2e7e32cd3..cd9b2eb98 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -11,7 +11,7 @@ # limitations under the License. import re -from typing import Any, List, Dict +from typing import Any, List, Dict, Optional, Callable from license_expression import get_spdx_licensing from ply import yacc @@ -72,28 +72,29 @@ def p_start_start_attrib(self, p): def p_start_attrib(self, p): pass - @grammar_rule("attrib : spdx_version\n| spdx_id\n| data_lics\n| doc_name\n| doc_comment\n| doc_namespace\n| " - "creator\n| created\n| creator_comment\n| lics_list_ver\n| ext_doc_ref\n" - # attributes for file - "| file_name\n| file_type\n| file_checksum\n| file_conc\n| file_lics_info\n| file_cr_text\n" - "| file_lics_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contrib\n" - # attributes for annotation - "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" - # attributes for relationship - "| relationship\n" - # attributes for snippet - "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" - "| snip_lic_comment\n| snip_file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" - "| snip_line_range\n" - # attributes for package - "| package_name\n| package_version\n| pkg_down_location\n| pkg_files_analyzed\n| pkg_home\n" - "| pkg_summary\n| pkg_src_info\n| pkg_file_name\n| pkg_supplier\n| pkg_orig\n| pkg_checksum\n" - "| pkg_verif\n| pkg_desc\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" - "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" - "| built_date\n| release_date\n| valid_until_date\n" - # attributes for extracted licensing info - "| extr_lic_id\n| extr_lic_text\n| extr_lic_name\n| lic_xref\n| lic_comment\n" - "| unknown_tag ") + @grammar_rule( + "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " + "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" + # attributes for file + "| file_name\n| file_type\n| file_checksum\n| file_conc\n| file_lics_info\n| file_cr_text\n" + "| file_lics_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contrib\n" + # attributes for annotation + "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" + # attributes for relationship + "| relationship\n" + # attributes for snippet + "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" + "| snip_lic_comment\n| snip_file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" + "| snip_line_range\n" + # attributes for package + "| package_name\n| package_version\n| pkg_down_location\n| pkg_files_analyzed\n| pkg_home\n" + "| pkg_summary\n| pkg_src_info\n| pkg_file_name\n| pkg_supplier\n| pkg_orig\n| pkg_checksum\n" + "| pkg_verif\n| pkg_desc\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" + "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" + "| built_date\n| release_date\n| valid_until_date\n" + # attributes for extracted licensing info + "| extr_lic_id\n| extr_lic_text\n| extr_lic_name\n| lic_xref\n| lic_comment\n" + "| unknown_tag ") def p_attrib(self, p): pass @@ -137,49 +138,61 @@ def p_spdx_id(self, p): self.creation_info["spdx_id"] = p[2] # parsing methods for creation info / document level + def set_creation_info_value(self, parsed_value: Any, argument_name: Optional[str] = None, + method_to_apply: Callable = lambda x: x): + if not argument_name: + argument_name = str(parsed_value.slice[0]) + if argument_name in self.creation_info: + self.creation_info["logger"].append( + f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}") + return + self.creation_info[argument_name] = method_to_apply(parsed_value[2]) - @grammar_rule("lics_list_ver : LIC_LIST_VER LINE") + @grammar_rule("license_list_version : LIC_LIST_VER LINE") def p_license_list_version(self, p): try: - self.creation_info["license_list_version"] = Version.from_string(p[2]) + if str(p.slice[0]) in self.creation_info: + self.creation_info["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") + return + self.creation_info[str(p.slice[0])] = Version.from_string(p[2]) except ValueError as err: self.creation_info["logger"].append(err.args[0]) - @grammar_rule("lics_list_ver : LIC_LIST_VER error") + @grammar_rule("license_list_version : LIC_LIST_VER error") def p_license_list_version_error(self, p): self.creation_info["logger"].append( f"Error while parsing LicenseListVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("doc_comment : DOC_COMMENT text_or_line") + @grammar_rule("document_comment : DOC_COMMENT text_or_line") def p_doc_comment(self, p): - self.creation_info["document_comment"] = p[2] + self.set_creation_info_value(p) - @grammar_rule("doc_comment : DOC_COMMENT error") + @grammar_rule("document_comment : DOC_COMMENT error") def p_doc_comment_error(self, p): self.creation_info["logger"].append( f"Error while parsing DocumentComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("doc_namespace : DOC_NAMESPACE LINE") + @grammar_rule("document_namespace : DOC_NAMESPACE LINE") def p_doc_namespace(self, p): - self.creation_info["document_namespace"] = p[2] + self.set_creation_info_value(p) - @grammar_rule("doc_namespace : DOC_NAMESPACE error") + @grammar_rule("document_namespace : DOC_NAMESPACE error") def p_doc_namespace_error(self, p): self.creation_info["logger"].append( f"Error while parsing DocumentNamespace: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("data_lics : DOC_LICENSE LINE") + @grammar_rule("data_license : DOC_LICENSE LINE") def p_data_license(self, p): - self.creation_info["data_license"] = p[2] + self.set_creation_info_value(p) - @grammar_rule("data_lics : DOC_LICENSE error") + @grammar_rule("data_license : DOC_LICENSE error") def p_data_license_error(self, p): self.creation_info["logger"].append( f"Error while parsing DataLicense: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("doc_name : DOC_NAME LINE") def p_doc_name(self, p): - self.creation_info["name"] = p[2] + self.set_creation_info_value(p, "name") @grammar_rule("doc_name : DOC_NAME error") def p_doc_name_error(self, p): @@ -201,7 +214,7 @@ def p_external_document_ref_error(self, p): @grammar_rule("spdx_version : DOC_VERSION LINE") def p_spdx_version(self, p): - self.creation_info["spdx_version"] = p[2] + self.set_creation_info_value(p) @grammar_rule("spdx_version : DOC_VERSION error") def p_spdx_version_error(self, p): @@ -210,7 +223,7 @@ def p_spdx_version_error(self, p): @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") def p_creator_comment(self, p): - self.creation_info["creator_comment"] = p[2] + self.set_creation_info_value(p) @grammar_rule("creator_comment : CREATOR_COMMENT error") def p_creator_comment_error(self, p): @@ -228,7 +241,7 @@ def p_creator_error(self, p): @grammar_rule("created : CREATED DATE") def p_created(self, p): - self.creation_info["created"] = datetime_from_str(p[2]) + self.set_creation_info_value(p, method_to_apply=datetime_from_str) @grammar_rule("created : CREATED error") def p_created_error(self, p): diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index 6d9e4d427..d36e822c6 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -81,7 +81,10 @@ def test_creation_info(): 'SPDXID: SPDXRef-DOCUMENT']), ['Error while constructing CreationInfo: CreationInfo.__init__() missing 3 ' "required positional arguments: 'document_namespace', 'creators', and " - "'created'"])])) + "'created'"]), + ('LicenseListVersion: 3.5\nLicenseListVersion: 3.7', + [["Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " + "found. Line: 2']"]])])) def test_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: From 3d77094e7cf12c6c97453df05a5ec2aacdebe551 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 16:15:04 +0100 Subject: [PATCH 25/43] [refactor] use helper method to set unique values Signed-off-by: Meret Behrens --- .../parser/tagvalue/parser/helper_methods.py | 19 +- src/spdx/parser/tagvalue/parser/tagvalue.py | 181 ++++++++---------- 2 files changed, 102 insertions(+), 98 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py index 63690a829..c47e5b5c0 100644 --- a/src/spdx/parser/tagvalue/parser/helper_methods.py +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -from typing import Optional +from typing import Optional, Callable, Any, Dict from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.parser.error import SPDXParsingError @@ -22,6 +22,7 @@ def grammar_rule(doc): def decorate(func): func.__doc__ = doc return func + return decorate @@ -51,3 +52,19 @@ def parse_checksum(logger: Logger, checksum_str: str) -> Optional[Checksum]: logger.append(err.get_messages()) checksum = None return checksum + + +def set_value(parsed_value: Any, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None, + method_to_apply: Callable = lambda x: x): + if not argument_name: + argument_name = str(parsed_value.slice[0]) + if argument_name in dict_to_fill: + dict_to_fill["logger"].append( + f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}") + return + try: + dict_to_fill[argument_name] = method_to_apply(parsed_value[2]) + except SPDXParsingError as err: + dict_to_fill["logger"].append(err.get_messages()) + except ValueError as err: + dict_to_fill["logger"].append(err.args[0]) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index cd9b2eb98..64237b46e 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -11,7 +11,7 @@ # limitations under the License. import re -from typing import Any, List, Dict, Optional, Callable +from typing import Any, List, Dict from license_expression import get_spdx_licensing from ply import yacc @@ -35,7 +35,7 @@ from spdx.parser.logger import Logger from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer -from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum +from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum, set_value CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") @@ -84,16 +84,16 @@ def p_start_attrib(self, p): "| relationship\n" # attributes for snippet "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" - "| snip_lic_comment\n| snip_file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" + "| snip_lic_comment\n| file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" "| snip_line_range\n" # attributes for package - "| package_name\n| package_version\n| pkg_down_location\n| pkg_files_analyzed\n| pkg_home\n" - "| pkg_summary\n| pkg_src_info\n| pkg_file_name\n| pkg_supplier\n| pkg_orig\n| pkg_checksum\n" - "| pkg_verif\n| pkg_desc\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" + "| package_name\n| package_version\n| download_location\n| pkg_files_analyzed\n| homepage\n" + "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" + "| pkg_verif\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" "| built_date\n| release_date\n| valid_until_date\n" # attributes for extracted licensing info - "| extr_lic_id\n| extr_lic_text\n| extr_lic_name\n| lic_xref\n| lic_comment\n" + "| license_id\n| extracted_text\n| license_name\n| lic_xref\n| lic_comment\n" "| unknown_tag ") def p_attrib(self, p): pass @@ -138,25 +138,10 @@ def p_spdx_id(self, p): self.creation_info["spdx_id"] = p[2] # parsing methods for creation info / document level - def set_creation_info_value(self, parsed_value: Any, argument_name: Optional[str] = None, - method_to_apply: Callable = lambda x: x): - if not argument_name: - argument_name = str(parsed_value.slice[0]) - if argument_name in self.creation_info: - self.creation_info["logger"].append( - f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}") - return - self.creation_info[argument_name] = method_to_apply(parsed_value[2]) @grammar_rule("license_list_version : LIC_LIST_VER LINE") def p_license_list_version(self, p): - try: - if str(p.slice[0]) in self.creation_info: - self.creation_info["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") - return - self.creation_info[str(p.slice[0])] = Version.from_string(p[2]) - except ValueError as err: - self.creation_info["logger"].append(err.args[0]) + set_value(p, self.creation_info, method_to_apply=Version.from_string) @grammar_rule("license_list_version : LIC_LIST_VER error") def p_license_list_version_error(self, p): @@ -165,7 +150,7 @@ def p_license_list_version_error(self, p): @grammar_rule("document_comment : DOC_COMMENT text_or_line") def p_doc_comment(self, p): - self.set_creation_info_value(p) + set_value(p, self.creation_info) @grammar_rule("document_comment : DOC_COMMENT error") def p_doc_comment_error(self, p): @@ -174,7 +159,7 @@ def p_doc_comment_error(self, p): @grammar_rule("document_namespace : DOC_NAMESPACE LINE") def p_doc_namespace(self, p): - self.set_creation_info_value(p) + set_value(p, self.creation_info) @grammar_rule("document_namespace : DOC_NAMESPACE error") def p_doc_namespace_error(self, p): @@ -183,7 +168,7 @@ def p_doc_namespace_error(self, p): @grammar_rule("data_license : DOC_LICENSE LINE") def p_data_license(self, p): - self.set_creation_info_value(p) + set_value(p, self.creation_info) @grammar_rule("data_license : DOC_LICENSE error") def p_data_license_error(self, p): @@ -192,7 +177,7 @@ def p_data_license_error(self, p): @grammar_rule("doc_name : DOC_NAME LINE") def p_doc_name(self, p): - self.set_creation_info_value(p, "name") + set_value(p, self.creation_info, "name") @grammar_rule("doc_name : DOC_NAME error") def p_doc_name_error(self, p): @@ -214,7 +199,7 @@ def p_external_document_ref_error(self, p): @grammar_rule("spdx_version : DOC_VERSION LINE") def p_spdx_version(self, p): - self.set_creation_info_value(p) + set_value(p, self.creation_info) @grammar_rule("spdx_version : DOC_VERSION error") def p_spdx_version_error(self, p): @@ -223,7 +208,7 @@ def p_spdx_version_error(self, p): @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") def p_creator_comment(self, p): - self.set_creation_info_value(p) + set_value(p, self.creation_info) @grammar_rule("creator_comment : CREATOR_COMMENT error") def p_creator_comment_error(self, p): @@ -241,7 +226,7 @@ def p_creator_error(self, p): @grammar_rule("created : CREATED DATE") def p_created(self, p): - self.set_creation_info_value(p, method_to_apply=datetime_from_str) + set_value(p, self.creation_info, method_to_apply=datetime_from_str) @grammar_rule("created : CREATED error") def p_created_error(self, p): @@ -250,12 +235,12 @@ def p_created_error(self, p): # parsing methods for extracted licensing info - @grammar_rule("extr_lic_id : LICS_ID LINE") + @grammar_rule("license_id : LICS_ID LINE") def p_extracted_license_id(self, p): self.initialize_new_current_element(ExtractedLicensingInfo) - self.current_element["license_id"] = p[2] + set_value(p, self.current_element) - @grammar_rule("extr_lic_id : LICS_ID error") + @grammar_rule("license_id : LICS_ID error") def p_extracted_license_id_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -274,29 +259,29 @@ def p_extracted_cross_reference_error(self, p): @grammar_rule("lic_comment : LICS_COMMENT text_or_line") def p_license_comment(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) - self.current_element["comment"] = p[2] + set_value(p, self.current_element, argument_name="comment") @grammar_rule("lic_comment : LICS_COMMENT error") def p_license_comment_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("extr_lic_name : LICS_NAME line_or_no_assertion") + @grammar_rule("license_name : LICS_NAME line_or_no_assertion") def p_extracted_license_name(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) - self.current_element["license_name"] = p[2] + set_value(p, self.current_element) - @grammar_rule("extr_lic_name : LICS_NAME error") + @grammar_rule("license_name : LICS_NAME error") def p_extracted_license_name_error(self, p): self.current_element["logger"].append( f"Error while parsing LicenseName: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("extr_lic_text : LICS_TEXT text_or_line") + @grammar_rule("extracted_text : LICS_TEXT text_or_line") def p_extracted_license_text(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) - self.current_element["extracted_text"] = p[2] + set_value(p, self.current_element) - @grammar_rule("extr_lic_text : LICS_TEXT error") + @grammar_rule("extracted_text : LICS_TEXT error") def p_extracted_license_text_error(self, p): self.current_element["logger"].append( f"Error while parsing ExtractedText: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -306,7 +291,7 @@ def p_extracted_license_text_error(self, p): @grammar_rule("file_name : FILE_NAME LINE") def p_file_name(self, p): self.initialize_new_current_element(File) - self.current_element["name"] = p[2] + set_value(p, self.current_element, argument_name="name") @grammar_rule("file_name : FILE_NAME error") def p_file_name_error(self, p): @@ -327,7 +312,7 @@ def p_file_contributor_error(self, p): @grammar_rule("file_notice : FILE_NOTICE text_or_line") def p_file_notice(self, p): self.check_that_current_element_matches_class_for_value(File) - self.current_element["notice"] = p[2] + set_value(p, self.current_element, argument_name="notice") @grammar_rule("file_notice : FILE_NOTICE error") def p_file_notice_error(self, p): @@ -337,7 +322,7 @@ def p_file_notice_error(self, p): @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") def p_file_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(File) - self.current_element["copyright_text"] = p[2] + set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("file_cr_text : FILE_CR_TEXT error") def p_file_copyright_text_error(self, p): @@ -347,7 +332,7 @@ def p_file_copyright_text_error(self, p): @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") def p_file_license_comment(self, p): self.check_that_current_element_matches_class_for_value(File) - self.current_element["license_comment"] = p[2] + set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") def p_file_license_comment_error(self, p): @@ -381,7 +366,7 @@ def p_file_license_info_error(self, p): @grammar_rule("file_comment : FILE_COMMENT text_or_line") def p_file_comment(self, p): self.check_that_current_element_matches_class_for_value(File) - self.current_element["comment"] = p[2] + set_value(p, self.current_element, argument_name="comment") @grammar_rule("file_comment : FILE_COMMENT error") def p_file_comment_error(self, p): @@ -418,7 +403,7 @@ def p_file_checksum_error(self, p): @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") def p_file_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(File) - self.current_element["license_concluded"] = p[2] + set_value(p, self.current_element, argument_name="license_concluded") @grammar_rule("file_conc : FILE_LICS_CONC error") def p_file_license_concluded_error(self, p): @@ -431,7 +416,7 @@ def p_file_license_concluded_error(self, p): @grammar_rule("package_name : PKG_NAME LINE") def p_package_name(self, p): self.initialize_new_current_element(Package) - self.current_element["name"] = p[2] + set_value(p, self.current_element, argument_name="name") @grammar_rule("package_name : PKG_NAME error") def p_package_name_error(self, p): @@ -439,12 +424,12 @@ def p_package_name_error(self, p): self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_desc : PKG_DESC text_or_line") + @grammar_rule("description : PKG_DESC text_or_line") def p_pkg_description(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["description"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_desc : PKG_DESC error") + @grammar_rule("description : PKG_DESC error") def p_pkg_description_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageDescription: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -452,7 +437,7 @@ def p_pkg_description_error(self, p): @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") def p_pkg_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["comment"] = p[2] + set_value(p, self.current_element, argument_name="comment") @grammar_rule("pkg_comment : PKG_COMMENT error") def p_pkg_comment_error(self, p): @@ -470,12 +455,12 @@ def p_pkg_attribution_text_error(self, p): f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("pkg_summary : PKG_SUM text_or_line") + @grammar_rule("summary : PKG_SUM text_or_line") def p_pkg_summary(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["summary"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_summary : PKG_SUM error") + @grammar_rule("summary : PKG_SUM error") def p_pkg_summary_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSummary: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -483,7 +468,7 @@ def p_pkg_summary_error(self, p): @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") def p_pkg_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["copyright_text"] = p[2] + set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") def p_pkg_copyright_text_error(self, p): @@ -494,7 +479,12 @@ def p_pkg_copyright_text_error(self, p): @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_external_refs(self, p): self.check_that_current_element_matches_class_for_value(Package) - category, reference_type, locator = p[2].split(" ") + try: + category, reference_type, locator = p[2].split(" ") + except ValueError: + self.current_element["logger"].append( + f"Couldn't split PackageExternalRef in category, reference_type and locator. Line: {p.lineno(1)}") + return comment = None if len(p) == 5: comment = p[4] @@ -523,7 +513,7 @@ def p_pkg_external_refs_error(self, p): @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") def p_pkg_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["license_comment"] = p[2] + set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") def p_pkg_license_comment_error(self, p): @@ -534,7 +524,7 @@ def p_pkg_license_comment_error(self, p): @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") def p_pkg_license_declared(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["license_declared"] = p[2] + set_value(p, self.current_element, argument_name="license_declared") @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") def p_pkg_license_declared_error(self, p): @@ -559,7 +549,7 @@ def p_pkg_license_info_from_file_error(self, p): @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") def p_pkg_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["license_concluded"] = p[2] + set_value(p, self.current_element, argument_name="license_concluded") @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") def p_pkg_license_concluded_error(self, p): @@ -567,12 +557,12 @@ def p_pkg_license_concluded_error(self, p): f"Error while parsing LicenseConcluded in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("pkg_src_info : PKG_SRC_INFO text_or_line") + @grammar_rule("source_info : PKG_SRC_INFO text_or_line") def p_pkg_source_info(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["source_info"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_src_info : PKG_SRC_INFO error") + @grammar_rule("source_info : PKG_SRC_INFO error") def p_pkg_source_info_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSourceInfo: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -607,22 +597,22 @@ def p_pkg_verification_code_error(self, p): f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("pkg_home : PKG_HOME line_or_no_assertion_or_none") + @grammar_rule("homepage : PKG_HOME line_or_no_assertion_or_none") def p_pkg_homepage(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["homepage"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_home : PKG_HOME error") + @grammar_rule("homepage : PKG_HOME error") def p_pkg_homepage_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageHomePage: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_down_location : PKG_DOWN line_or_no_assertion_or_none") + @grammar_rule("download_location : PKG_DOWN line_or_no_assertion_or_none") def p_pkg_download_location(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["download_location"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_down_location : PKG_DOWN error") + @grammar_rule("download_location : PKG_DOWN error") def p_pkg_download_location_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " @@ -642,22 +632,22 @@ def p_pkg_files_analyzed_error(self, p): f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("pkg_orig : PKG_ORIG actor_or_no_assertion") + @grammar_rule("originator : PKG_ORIG actor_or_no_assertion") def p_pkg_originator(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["originator"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_orig : PKG_ORIG error") + @grammar_rule("originator : PKG_ORIG error") def p_pkg_originator_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_supplier : PKG_SUPPL actor_or_no_assertion") + @grammar_rule("supplier : PKG_SUPPL actor_or_no_assertion") def p_pkg_supplier(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["supplier"] = p[2] + set_value(p, self.current_element) - @grammar_rule("pkg_supplier : PKG_SUPPL error") + @grammar_rule("supplier : PKG_SUPPL error") def p_pkg_supplier_error(self, p): self.current_element["logger"].append( f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -665,7 +655,7 @@ def p_pkg_supplier_error(self, p): @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") def p_pkg_file_name(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["file_name"] = p[2] + set_value(p, self.current_element, argument_name="file_name") @grammar_rule("pkg_file_name : PKG_FILE_NAME error") def p_pkg_file_name_error(self, p): @@ -675,7 +665,7 @@ def p_pkg_file_name_error(self, p): @grammar_rule("package_version : PKG_VERSION LINE") def p_package_version(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["version"] = p[2] + set_value(p, self.current_element, argument_name="version") @grammar_rule("package_version : PKG_VERSION error") def p_package_version_error(self, p): @@ -685,7 +675,7 @@ def p_package_version_error(self, p): @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") def p_primary_package_purpose(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["primary_package_purpose"] = PackagePurpose[p[2].replace("-", "_")] + set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") def p_primary_package_purpose_error(self, p): @@ -701,7 +691,7 @@ def p_primary_package_purpose_value(self, p): @grammar_rule("built_date : BUILT_DATE DATE") def p_built_date(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["built_date"] = datetime_from_str(p[2]) + set_value(p, self.current_element, method_to_apply=datetime_from_str) @grammar_rule("built_date : BUILT_DATE error") def p_built_date_error(self, p): @@ -711,7 +701,7 @@ def p_built_date_error(self, p): @grammar_rule("release_date : RELEASE_DATE DATE") def p_release_date(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["release_date"] = datetime_from_str(p[2]) + set_value(p, self.current_element, method_to_apply=datetime_from_str) @grammar_rule("release_date : RELEASE_DATE error") def p_release_date_error(self, p): @@ -721,7 +711,7 @@ def p_release_date_error(self, p): @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") def p_valid_until_date(self, p): self.check_that_current_element_matches_class_for_value(Package) - self.current_element["valid_until_date"] = datetime_from_str(p[2]) + set_value(p, self.current_element, method_to_apply=datetime_from_str) @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") def p_valid_until_date_error(self, p): @@ -732,7 +722,7 @@ def p_valid_until_date_error(self, p): @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") def p_snippet_spdx_id(self, p): self.initialize_new_current_element(Snippet) - self.current_element["spdx_id"] = p[2] + set_value(p, self.current_element, argument_name="spdx_id") @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") def p_snippet_spdx_id_error(self, p): @@ -743,7 +733,7 @@ def p_snippet_spdx_id_error(self, p): @grammar_rule("snip_name : SNIPPET_NAME LINE") def p_snippet_name(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["name"] = p[2] + set_value(p, self.current_element, argument_name="name") @grammar_rule("snip_name : SNIPPET_NAME error") def p_snippet_name_error(self, p): @@ -753,7 +743,7 @@ def p_snippet_name_error(self, p): @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") def p_snippet_comment(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["comment"] = p[2] + set_value(p, self.current_element, argument_name="comment") @grammar_rule("snip_comment : SNIPPET_COMMENT error") def p_snippet_comment_error(self, p): @@ -774,7 +764,7 @@ def p_snippet_attribution_text_error(self, p): @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") def p_snippet_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["copyright_text"] = p[2] + set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") def p_snippet_copyright_text_error(self, p): @@ -785,7 +775,7 @@ def p_snippet_copyright_text_error(self, p): @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") def p_snippet_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["license_comment"] = p[2] + set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") def p_snippet_license_comment_error(self, p): @@ -793,12 +783,12 @@ def p_snippet_license_comment_error(self, p): f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") - @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID LINE") + @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID LINE") def p_snippet_from_file_spdxid(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["file_spdx_id"] = p[2] + set_value(p, self.current_element) - @grammar_rule("snip_file_spdx_id : SNIPPET_FILE_SPDXID error") + @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID error") def p_snippet_from_file_spdxid_error(self, p): self.current_element["logger"].append( f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " @@ -807,7 +797,7 @@ def p_snippet_from_file_spdxid_error(self, p): @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") def p_snippet_concluded_license(self, p): self.check_that_current_element_matches_class_for_value(Snippet) - self.current_element["license_concluded"] = p[2] + set_value(p, self.current_element, argument_name="license_concluded") @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") def p_snippet_concluded_license_error(self, p): @@ -866,10 +856,7 @@ def p_snippet_line_range_error(self, p): def p_annotator(self, p): """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" self.initialize_new_current_element(Annotation) - try: - self.current_element["annotator"] = ActorParser.parse_actor(p[2]) - except SPDXParsingError as err: - self.current_element["logger"].append(err.get_messages()) + set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) @grammar_rule("annotator : ANNOTATOR error") def p_annotator_error(self, p): @@ -880,7 +867,7 @@ def p_annotator_error(self, p): @grammar_rule("annotation_date : ANNOTATION_DATE DATE") def p_annotation_date(self, p): self.check_that_current_element_matches_class_for_value(Annotation) - self.current_element["annotation_date"] = datetime_from_str(p[2]) + set_value(p, self.current_element, method_to_apply=datetime_from_str) @grammar_rule("annotation_date : ANNOTATION_DATE error") def p_annotation_date_error(self, p): @@ -890,7 +877,7 @@ def p_annotation_date_error(self, p): @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") def p_annotation_comment(self, p): self.check_that_current_element_matches_class_for_value(Annotation) - self.current_element["annotation_comment"] = p[2] + set_value(p, self.current_element) @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") def p_annotation_comment_error(self, p): @@ -900,7 +887,7 @@ def p_annotation_comment_error(self, p): @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") def p_annotation_type(self, p): self.check_that_current_element_matches_class_for_value(Annotation) - self.current_element["annotation_type"] = AnnotationType[p[2]] + set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) @grammar_rule("annotation_type : ANNOTATION_TYPE error") def p_annotation_type_error(self, p): @@ -913,7 +900,7 @@ def p_annotation_type_value(self, p): @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID LINE") def p_annotation_spdx_id(self, p): - self.current_element["spdx_id"] = p[2] + set_value(p, self.current_element, argument_name="spdx_id") @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID error") def p_annotation_spdx_id_error(self, p): From 1b8d9a12a3b13f80994a626cecc36e6f40f754de Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 1 Mar 2023 16:25:23 +0100 Subject: [PATCH 26/43] [refactor] merge parsing methods for package dates Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 64237b46e..6eec4efa1 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -688,7 +688,8 @@ def p_primary_package_purpose_error(self, p): def p_primary_package_purpose_value(self, p): p[0] = p[1] - @grammar_rule("built_date : BUILT_DATE DATE") + @grammar_rule("built_date : BUILT_DATE DATE\n release_date : RELEASE_DATE DATE\n " + "valid_until_date : VALID_UNTIL_DATE DATE") def p_built_date(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, method_to_apply=datetime_from_str) @@ -698,21 +699,11 @@ def p_built_date_error(self, p): self.current_element["logger"].append( f"Error while parsing BuiltDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("release_date : RELEASE_DATE DATE") - def p_release_date(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element, method_to_apply=datetime_from_str) - @grammar_rule("release_date : RELEASE_DATE error") def p_release_date_error(self, p): self.current_element["logger"].append( f"Error while parsing ReleaseDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("valid_until_date : VALID_UNTIL_DATE DATE") - def p_valid_until_date(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element, method_to_apply=datetime_from_str) - @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") def p_valid_until_date_error(self, p): self.current_element["logger"].append( From 5b62711c3c3b854e15ad7e31440f2b5e6a770332 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 08:51:38 +0100 Subject: [PATCH 27/43] [issue-382] rename tests Signed-off-by: Meret Behrens --- tests/spdx/parser/tagvalue/test_creation_info_parser.py | 4 ++-- .../parser/tagvalue/test_extracted_licensing_info_parser.py | 4 ++-- tests/spdx/parser/tagvalue/test_file_parser.py | 4 ++-- tests/spdx/parser/tagvalue/test_package_parser.py | 2 +- tests/spdx/parser/tagvalue/test_relationship_parser.py | 4 ++-- tests/spdx/parser/tagvalue/test_snippet_parser.py | 2 +- tests/spdx/parser/tagvalue/test_tag_value_parser.py | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index d36e822c6..de5a794c3 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -36,7 +36,7 @@ ]) -def test_creation_info(): +def test_parse_creation_info(): parser = Parser() document = parser.parse(DOCUMENT_STR) assert document is not None @@ -85,7 +85,7 @@ def test_creation_info(): ('LicenseListVersion: 3.5\nLicenseListVersion: 3.7', [["Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " "found. Line: 2']"]])])) -def test_invalid_creation_info(document_str, expected_message): +def test_parse_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(document_str) diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py index 753577cb2..ffffcafb5 100644 --- a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -17,7 +17,7 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -def test_extracted_licensing_info(): +def test_parse_extracted_licensing_info(): parser = Parser() extracted_licensing_info_str = '\n'.join([ 'LicenseID: LicenseRef-Beerware-4.2', @@ -43,7 +43,7 @@ def test_extracted_licensing_info(): assert extracted_licensing_info.comment == "The beerware license has a couple of other standard variants." -def test_parse_invalid_licensing_info(): +def test_parse_invalid_extracted_licensing_info(): parser = Parser() extracted_licensing_info_str = '\n'.join([ 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index ec5a9df5d..1b37b2b5a 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -17,7 +17,7 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -def test_file(): +def test_parse_file(): parser = Parser() file_str = '\n'.join([ 'FileName: testfile.java', @@ -45,7 +45,7 @@ def test_file(): assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") -def test_invalid_file(): +def test_parse_invalid_file(): parser = Parser() file_str = '\n'.join([ 'FileName: testfile.java', diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index c17516610..eeb97af55 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -18,7 +18,7 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -def test_package(): +def test_parse_package(): parser = Parser() package_str = '\n'.join([ 'PackageName: Test', diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index c60e7eb76..f6776299b 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -32,7 +32,7 @@ Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON, "DocumentRef:AnotherRef")) ]) -def test_relationship(relationship_str, expected_relationship): +def test_parse_relationship(relationship_str, expected_relationship): parser = Parser() document = parser.parse("\n".join([DOCUMENT_STR, relationship_str])) assert document is not None @@ -50,7 +50,7 @@ def test_relationship(relationship_str, expected_relationship): [["Error while parsing Relationship: ['Error while parsing Relationship: Token " "did not match specified grammar rule. Line: 1']"]]) ]) -def test_falsy_relationship(relationship_str, expected_message): +def test_parse_invalid_relationship(relationship_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(relationship_str) diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index 80a10bd40..d8d95202d 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -14,7 +14,7 @@ from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR -def test_snippet(): +def test_parse_snippet(): parser = Parser() snippet_str = '\n'.join([ 'SnippetSPDXID: SPDXRef-Snippet', diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index af88aab53..e96fa9c80 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -18,7 +18,7 @@ from spdx.parser.tagvalue.parser.tagvalue import Parser -def test_unknown_str(): +def test_parse_unknown_tag(): parser = Parser() unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' @@ -26,7 +26,7 @@ def test_unknown_str(): parser.parse(unknown_tag_str) -def test_parse_file(): +def test_tag_value_parser(): parser = Parser() fn = os.path.join(os.path.dirname(__file__), "../../data/formats/SPDXTagExample-v2.3.spdx") From c4d1ed543beb2b9b4f7a9f77c5b890bd5edbfa90 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 09:08:01 +0100 Subject: [PATCH 28/43] [issue-382] add negative tests for snippet_parser Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 8 ++++- .../parser/tagvalue/test_snippet_parser.py | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 6eec4efa1..9932e0a60 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -806,7 +806,6 @@ def p_snippet_license_info(self, p): @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO error") def p_snippet_license_info_error(self, p): - self.current_element["logger"].append( f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " f"Line: {p.lineno(1)}") @@ -814,6 +813,9 @@ def p_snippet_license_info_error(self, p): @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") def p_snippet_byte_range(self, p): self.check_that_current_element_matches_class_for_value(Snippet) + if "byte_range" in self.current_element: + self.current_element["logger"].append( + f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): self.current_element["logger"].append("Value for SnippetByteRange doesn't match valid range pattern.") @@ -830,6 +832,10 @@ def p_snippet_byte_range_error(self, p): @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_line_range(self, p): self.check_that_current_element_matches_class_for_value(Snippet) + if "line_range" in self.current_element: + self.current_element["logger"].append( + f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") + return range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): self.current_element["logger"].append("Value for SnippetLineRange doesn't match valid range pattern.") diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index d8d95202d..a6f84d415 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -8,8 +8,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import TestCase + +import pytest from license_expression import get_spdx_licensing +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @@ -27,6 +31,8 @@ def test_parse_snippet(): 'LicenseInfoInSnippet: Apache-2.0', 'SnippetByteRange: 310:420', 'SnippetLineRange: 5:23', + 'SnippetAttributionText: This is a text\nthat spans multiple lines.', + 'SnippetAttributionText: This text spans one line but has trailing and leading whitespaces. ' ]) document = parser.parse("\n".join([DOCUMENT_STR, snippet_str])) @@ -45,3 +51,27 @@ def test_parse_snippet(): assert snippet.byte_range[1] == 420 assert snippet.line_range[0] == 5 assert snippet.line_range[1] == 23 + TestCase().assertCountEqual( + snippet.attribution_texts, ["This is a text\nthat spans multiple lines.", + "This text spans one line but has trailing and leading whitespaces."]) + + +@pytest.mark.parametrize("snippet_str, expected_message", [ + ('SnippetName: TestSnippet', ['Element Snippet is not the current element in scope, probably the expected ' + 'tag to start the element (SnippetSPDXID) is missing.']), + ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4', + [['Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' + 'valid range pattern."]']]), + ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23', + [["Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " + "Line: 3']"]]), + ('SnippetSPDXID: SPDXRef-Snippet', [['Error while constructing Snippet: Snippet.__init__() missing 2 required ' + "positional arguments: 'file_spdx_id' and 'byte_range'"]]) +]) +def test_parse_invalid_snippet(snippet_str, expected_message): + parser = Parser() + + with pytest.raises(SPDXParsingError) as err: + parser.parse(snippet_str) + + assert err.value.get_messages() == expected_message From d545654051bb9722c2afab1bc50f56e2ca4838dc Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 3 Mar 2023 08:47:03 +0100 Subject: [PATCH 29/43] [issue-382] add negative tests for package parser Signed-off-by: Meret Behrens --- .../parser/tagvalue/test_package_parser.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index eeb97af55..9f2de8997 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -11,9 +11,11 @@ from datetime import datetime from unittest import TestCase +import pytest from license_expression import get_spdx_licensing from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory, PackagePurpose +from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @@ -70,3 +72,38 @@ def test_parse_package(): assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) + + +@pytest.mark.parametrize("package_str, expected_message", + [('PackageDownloadLocation: SPDXRef-Package', + ['Element Package is not the current element in scope, probably the expected ' + 'tag to start the element (PackageName) is missing.']), + ('PackageName: TestPackage', + [['Error while constructing Package: Package.__init__() missing 2 required ' + "positional arguments: 'spdx_id' and 'download_location'"]]), + ('PackageName: TestPackage\nPackageCopyrightText:This is a copyright\n' + 'PackageCopyrightText:MultipleCopyright', + [["Error while parsing Package: ['Multiple values for PackageCopyrightText " + "found. Line: 3']"]]), + ('PackageName: TestPackage\nExternalRef: reference locator', + [['Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' + 'category, reference_type and locator. Line: 2"]']]), + ('PackageName: TestPackage\nExternalRef: category reference locator', + [["Error while parsing Package: ['Invalid ExternalPackageRefCategory: " + "category']"]]), + ('SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n' + 'PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator', + [["Error while parsing Package: ['Error while parsing PackageVerificationCode: " + "Value did not match expected format. Line: 5']"]]), + ('PackageName: TestPackage\nBuiltDate: 2012\nValidUntilDate:202-11-02T00:00', + [["Error while parsing Package: ['Error while parsing BuiltDate: Token did not " + "match specified grammar rule. Line: 2', 'Error while parsing " + "ValidUntilDate: Token did not match specified grammar rule. Line: 3']"]]) + ]) +def test_parse_invalid_package(package_str, expected_message): + parser = Parser() + + with pytest.raises(SPDXParsingError) as err: + parser.parse(package_str) + + assert err.value.get_messages() == expected_message From 69748da12caa837e4520cb12bed5c33e9bbabb81 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 3 Mar 2023 08:47:14 +0100 Subject: [PATCH 30/43] [issue-382] merge parsing methods Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 497 ++++-------------- .../spdx/parser/tagvalue/test_file_parser.py | 4 +- 2 files changed, 90 insertions(+), 411 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 9932e0a60..9f8f78043 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -87,9 +87,9 @@ def p_start_attrib(self, p): "| snip_lic_comment\n| file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" "| snip_line_range\n" # attributes for package - "| package_name\n| package_version\n| download_location\n| pkg_files_analyzed\n| homepage\n" + "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" - "| pkg_verif\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" + "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" "| built_date\n| release_date\n| valid_until_date\n" # attributes for extracted licensing info @@ -139,50 +139,27 @@ def p_spdx_id(self, p): # parsing methods for creation info / document level - @grammar_rule("license_list_version : LIC_LIST_VER LINE") - def p_license_list_version(self, p): - set_value(p, self.creation_info, method_to_apply=Version.from_string) - - @grammar_rule("license_list_version : LIC_LIST_VER error") - def p_license_list_version_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing LicenseListVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("document_comment : DOC_COMMENT text_or_line") - def p_doc_comment(self, p): - set_value(p, self.creation_info) - - @grammar_rule("document_comment : DOC_COMMENT error") - def p_doc_comment_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing DocumentComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("document_namespace : DOC_NAMESPACE LINE") - def p_doc_namespace(self, p): - set_value(p, self.creation_info) - - @grammar_rule("document_namespace : DOC_NAMESPACE error") - def p_doc_namespace_error(self, p): + @grammar_rule("license_list_version : LIC_LIST_VER error\n document_comment : DOC_COMMENT error\n " + "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " + "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " + "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error") + def p_creation_info_value_error(self, p): self.creation_info["logger"].append( - f"Error while parsing DocumentNamespace: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("data_license : DOC_LICENSE LINE") - def p_data_license(self, p): + @grammar_rule("document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " + "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " + "creator_comment : CREATOR_COMMENT text_or_line") + def p_generic_value_creation_info(self, p): set_value(p, self.creation_info) - @grammar_rule("data_license : DOC_LICENSE error") - def p_data_license_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing DataLicense: Token did not match specified grammar rule. Line: {p.lineno(1)}") + @grammar_rule("license_list_version : LIC_LIST_VER LINE") + def p_license_list_version(self, p): + set_value(p, self.creation_info, method_to_apply=Version.from_string) @grammar_rule("doc_name : DOC_NAME LINE") def p_doc_name(self, p): - set_value(p, self.creation_info, "name") - - @grammar_rule("doc_name : DOC_NAME error") - def p_doc_name_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing DocumentName: Token did not match specified grammar rule. Line: {p.lineno(1)}") + set_value(p, self.creation_info, argument_name="name") @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") def p_external_document_ref(self, p): @@ -192,101 +169,52 @@ def p_external_document_ref(self, p): external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) - @grammar_rule("ext_doc_ref : EXT_DOC_REF error") - def p_external_document_ref_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing ExternalDocumentRef: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("spdx_version : DOC_VERSION LINE") - def p_spdx_version(self, p): - set_value(p, self.creation_info) - - @grammar_rule("spdx_version : DOC_VERSION error") - def p_spdx_version_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing SPDXVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("creator_comment : CREATOR_COMMENT text_or_line") - def p_creator_comment(self, p): - set_value(p, self.creation_info) - - @grammar_rule("creator_comment : CREATOR_COMMENT error") - def p_creator_comment_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing CreatorComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - def p_creator(self, p): """creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE""" self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) - @grammar_rule("creator : CREATOR error") - def p_creator_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing Creator: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("created : CREATED DATE") def p_created(self, p): set_value(p, self.creation_info, method_to_apply=datetime_from_str) - @grammar_rule("created : CREATED error") - def p_created_error(self, p): - self.creation_info["logger"].append( - f"Error while parsing Created: Token did not match specified grammar rule. Line: {p.lineno(1)}") - # parsing methods for extracted licensing info + @grammar_rule("license_id : LICS_ID error\n lic_xref : LICS_CRS_REF error\n lic_comment : LICS_COMMENT error\n " + "license_name : LICS_NAME error\n extracted_text : LICS_TEXT error") + def p_extracted_licensing_info_value_error(self, p): + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("license_name : LICS_NAME line_or_no_assertion\n extracted_text : LICS_TEXT text_or_line") + def p_generic_value_extracted_licensing_info(self, p): + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) + set_value(p, self.current_element) + @grammar_rule("license_id : LICS_ID LINE") def p_extracted_license_id(self, p): self.initialize_new_current_element(ExtractedLicensingInfo) set_value(p, self.current_element) - @grammar_rule("license_id : LICS_ID error") - def p_extracted_license_id_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseID: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("lic_xref : LICS_CRS_REF LINE") def p_extracted_cross_reference(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) self.current_element.setdefault("cross_references", []).append(p[2]) - @grammar_rule("lic_xref : LICS_CRS_REF error") - def p_extracted_cross_reference_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseCrossReference: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("lic_comment : LICS_COMMENT text_or_line") def p_license_comment(self, p): self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) set_value(p, self.current_element, argument_name="comment") - @grammar_rule("lic_comment : LICS_COMMENT error") - def p_license_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("license_name : LICS_NAME line_or_no_assertion") - def p_extracted_license_name(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) - set_value(p, self.current_element) - - @grammar_rule("license_name : LICS_NAME error") - def p_extracted_license_name_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseName: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("extracted_text : LICS_TEXT text_or_line") - def p_extracted_license_text(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) - set_value(p, self.current_element) + # parsing methods for file - @grammar_rule("extracted_text : LICS_TEXT error") - def p_extracted_license_text_error(self, p): + @grammar_rule("file_contrib : FILE_CONTRIB error\n file_notice : FILE_NOTICE error\n " + "file_cr_text : FILE_CR_TEXT error\n file_lics_comment : FILE_LICS_COMMENT error\n " + "file_attribution_text : FILE_ATTRIBUTION_TEXT error\n file_lics_info : FILE_LICS_INFO error\n " + "file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n " + "file_conc : FILE_LICS_CONC error\n file_type : FILE_TYPE error") + def p_file_value_error(self, p): self.current_element["logger"].append( - f"Error while parsing ExtractedText: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - # parsing methods for file + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("file_name : FILE_NAME LINE") def p_file_name(self, p): @@ -304,52 +232,26 @@ def p_file_contributor(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("contributors", []).append(p[2]) - @grammar_rule("file_contrib : FILE_CONTRIB error") - def p_file_contributor_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileContributor: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_notice : FILE_NOTICE text_or_line") def p_file_notice(self, p): self.check_that_current_element_matches_class_for_value(File) set_value(p, self.current_element, argument_name="notice") - @grammar_rule("file_notice : FILE_NOTICE error") - def p_file_notice_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileNotice: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") def p_file_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(File) set_value(p, self.current_element, argument_name="copyright_text") - @grammar_rule("file_cr_text : FILE_CR_TEXT error") - def p_file_copyright_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileCopyrightText: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") def p_file_license_comment(self, p): self.check_that_current_element_matches_class_for_value(File) set_value(p, self.current_element, argument_name="license_comment") - @grammar_rule("file_lics_comment : FILE_LICS_COMMENT error") - def p_file_license_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseComments in file: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") def p_file_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT error") - def p_file_attribution_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileAttributionText: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") def p_file_license_info(self, p): self.check_that_current_element_matches_class_for_value(File) @@ -358,31 +260,16 @@ def p_file_license_info(self, p): return self.current_element.setdefault("license_info_in_file", []).append(p[2]) - @grammar_rule("file_lics_info : FILE_LICS_INFO error") - def p_file_license_info_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseInfoInFile: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_comment : FILE_COMMENT text_or_line") def p_file_comment(self, p): self.check_that_current_element_matches_class_for_value(File) set_value(p, self.current_element, argument_name="comment") - @grammar_rule("file_comment : FILE_COMMENT error") - def p_file_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_type : FILE_TYPE file_type_value") def p_file_type(self, p): self.check_that_current_element_matches_class_for_value(File) self.current_element.setdefault("file_type", []).append(FileType[p[2]]) - @grammar_rule("file_type : FILE_TYPE error") - def p_file_type_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FileType: Token did not match any of the valid values. Line: {p.lineno(1)}") - @grammar_rule( "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" " | DOCUMENTATION\n| SPDX \n| OTHER ") @@ -395,23 +282,35 @@ def p_file_checksum(self, p): checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) - @grammar_rule("file_checksum : FILE_CHECKSUM error") - def p_file_checksum_error(self, p): - self.current_element["logger"].append( - f"Error while parsing Checksum in file: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") def p_file_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(File) set_value(p, self.current_element, argument_name="license_concluded") - @grammar_rule("file_conc : FILE_LICS_CONC error") - def p_file_license_concluded_error(self, p): + # parsing methods for package + + @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n description : PKG_DESC error\n " + "pkg_comment : PKG_COMMENT error\n summary : PKG_SUM error\n pkg_cr_text : PKG_CPY_TEXT error\n " + "pkg_ext_ref : PKG_EXT_REF error\n pkg_lic_comment : PKG_LICS_COMMENT error\n " + "pkg_lic_decl : PKG_LICS_DECL error\n pkg_lic_ff : PKG_LICS_FFILE error \n " + "pkg_lic_conc : PKG_LICS_CONC error\n source_info : PKG_SRC_INFO error\n homepage : PKG_HOME error\n " + "pkg_checksum : PKG_CHECKSUM error\n verification_code : PKG_VERF_CODE error\n " + "download_location : PKG_DOWN error\n files_analyzed : PKG_FILES_ANALYZED error\n " + "originator : PKG_ORIG error\n supplier : PKG_SUPPL error\n pkg_file_name : PKG_FILE_NAME error\n " + "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " + "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " + "valid_until_date : VALID_UNTIL_DATE error") + def p_package_value_error(self, p): self.current_element["logger"].append( - f"Error while parsing LicenseConcluded in file: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - # parsing methods for package + @grammar_rule("description : PKG_DESC text_or_line\n summary : PKG_SUM text_or_line\n " + "source_info : PKG_SRC_INFO text_or_line\n homepage : PKG_HOME line_or_no_assertion_or_none\n " + "download_location : PKG_DOWN line_or_no_assertion_or_none\n " + "originator : PKG_ORIG actor_or_no_assertion\n supplier : PKG_SUPPL actor_or_no_assertion") + def p_generic_package_value(self, p): + self.check_that_current_element_matches_class_for_value(Package) + set_value(p, self.current_element) @grammar_rule("package_name : PKG_NAME LINE") def p_package_name(self, p): @@ -424,58 +323,21 @@ def p_package_name_error(self, p): self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("description : PKG_DESC text_or_line") - def p_pkg_description(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("description : PKG_DESC error") - def p_pkg_description_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageDescription: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") def p_pkg_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="comment") - @grammar_rule("pkg_comment : PKG_COMMENT error") - def p_pkg_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") def p_pkg_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Package) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error") - def p_pkg_attribution_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageAttributionText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - - @grammar_rule("summary : PKG_SUM text_or_line") - def p_pkg_summary(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("summary : PKG_SUM error") - def p_pkg_summary_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageSummary: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") def p_pkg_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="copyright_text") - @grammar_rule("pkg_cr_text : PKG_CPY_TEXT error") - def p_pkg_copyright_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageCopyrightText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_external_refs(self, p): self.check_that_current_element_matches_class_for_value(Package) @@ -504,34 +366,16 @@ def p_pkg_external_refs(self, p): return self.current_element.setdefault("external_references", []).append(external_package_ref) - @grammar_rule("pkg_ext_ref : PKG_EXT_REF error") - def p_pkg_external_refs_error(self, p): - self.current_element["logger"].append( - f"Error while parsing ExternalRef in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") def p_pkg_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="license_comment") - @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT error") - def p_pkg_license_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageLicenseComments: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") def p_pkg_license_declared(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="license_declared") - @grammar_rule("pkg_lic_decl : PKG_LICS_DECL error") - def p_pkg_license_declared_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseDeclared in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") def p_pkg_license_info_from_file(self, p): self.check_that_current_element_matches_class_for_value(Package) @@ -540,149 +384,60 @@ def p_pkg_license_info_from_file(self, p): else: self.current_element.setdefault("license_info_from_files", []).append(p[2]) - @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE error") - def p_pkg_license_info_from_file_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseInfoFromFiles in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") def p_pkg_license_concluded(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="license_concluded") - @grammar_rule("pkg_lic_conc : PKG_LICS_CONC error") - def p_pkg_license_concluded_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseConcluded in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - - @grammar_rule("source_info : PKG_SRC_INFO text_or_line") - def p_pkg_source_info(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("source_info : PKG_SRC_INFO error") - def p_pkg_source_info_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageSourceInfo: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum(self, p): self.check_that_current_element_matches_class_for_value(Package) checksum = parse_checksum(self.current_element["logger"], p[2]) self.current_element.setdefault("checksums", []).append(checksum) - @grammar_rule("pkg_checksum : PKG_CHECKSUM error") - def p_pkg_checksum_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageChecksum: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("pkg_verif : PKG_VERF_CODE LINE") + @grammar_rule("verification_code : PKG_VERF_CODE LINE") def p_pkg_verification_code(self, p): self.check_that_current_element_matches_class_for_value(Package) - verif_code_regex = re.compile(r"([0-9a-f]+)\s*(\(excludes:\s*(.+)\))?", re.UNICODE) + if str(p.slice[0]) in self.current_element: + self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") + return + verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) verif_code_code_grp = 1 verif_code_exc_files_grp = 3 match = verif_code_regex.match(p[2]) + if not match: + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Value did not match expected format. Line: {p.lineno(1)}") + return value = match.group(verif_code_code_grp) excluded_files = None if match.group(verif_code_exc_files_grp): excluded_files = match.group(verif_code_exc_files_grp).split(",") - self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) - - @grammar_rule("pkg_verif : PKG_VERF_CODE error") - def p_pkg_verification_code_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageVerificationCode: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - - @grammar_rule("homepage : PKG_HOME line_or_no_assertion_or_none") - def p_pkg_homepage(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) + self.current_element[str(p.slice[0])] = PackageVerificationCode(value, excluded_files) - @grammar_rule("homepage : PKG_HOME error") - def p_pkg_homepage_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageHomePage: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("download_location : PKG_DOWN line_or_no_assertion_or_none") - def p_pkg_download_location(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("download_location : PKG_DOWN error") - def p_pkg_download_location_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageDownloadLocation: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - - @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED LINE") + @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") def p_pkg_files_analyzed(self, p): self.check_that_current_element_matches_class_for_value(Package) - if p[2] in ['false', 'False']: - self.current_element["files_analyzed"] = False - if p[2] in ['true', 'True']: - self.current_element["files_analyzed"] = True - - @grammar_rule("pkg_files_analyzed : PKG_FILES_ANALYZED error") - def p_pkg_files_analyzed_error(self, p): - self.current_element["logger"].append( - f"Error while parsing FilesAnalyzed in package: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - - @grammar_rule("originator : PKG_ORIG actor_or_no_assertion") - def p_pkg_originator(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("originator : PKG_ORIG error") - def p_pkg_originator_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageOriginator: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("supplier : PKG_SUPPL actor_or_no_assertion") - def p_pkg_supplier(self, p): - self.check_that_current_element_matches_class_for_value(Package) - set_value(p, self.current_element) - - @grammar_rule("supplier : PKG_SUPPL error") - def p_pkg_supplier_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageSupplier: Token did not match specified grammar rule. Line: {p.lineno(1)}") + if str(p.slice[0]) in self.current_element: + self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") + return + self.current_element[str(p.slice[0])] = p[2] in ['true', 'True'] @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") def p_pkg_file_name(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="file_name") - @grammar_rule("pkg_file_name : PKG_FILE_NAME error") - def p_pkg_file_name_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageFileName: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("package_version : PKG_VERSION LINE") def p_package_version(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, argument_name="version") - @grammar_rule("package_version : PKG_VERSION error") - def p_package_version_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PackageVersion: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") def p_primary_package_purpose(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) - @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error") - def p_primary_package_purpose_error(self, p): - self.current_element["logger"].append( - f"Error while parsing PrimaryPackagePurpose: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("primary_package_purpose_value : APPLICATION\n | FRAMEWORK\n | LIBRARY\n | CONTAINER\n " "| OPERATING_SYSTEM \n | DEVICE \n| FIRMWARE\n | SOURCE\n | ARCHIVE\n | FILE\n | INSTALL\n | OTHER") def p_primary_package_purpose_value(self, p): @@ -690,26 +445,21 @@ def p_primary_package_purpose_value(self, p): @grammar_rule("built_date : BUILT_DATE DATE\n release_date : RELEASE_DATE DATE\n " "valid_until_date : VALID_UNTIL_DATE DATE") - def p_built_date(self, p): + def p_package_dates(self, p): self.check_that_current_element_matches_class_for_value(Package) set_value(p, self.current_element, method_to_apply=datetime_from_str) - @grammar_rule("built_date : BUILT_DATE error") - def p_built_date_error(self, p): - self.current_element["logger"].append( - f"Error while parsing BuiltDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("release_date : RELEASE_DATE error") - def p_release_date_error(self, p): - self.current_element["logger"].append( - f"Error while parsing ReleaseDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + # parsing methods for snippet - @grammar_rule("valid_until_date : VALID_UNTIL_DATE error") - def p_valid_until_date_error(self, p): + @grammar_rule("snip_name : SNIPPET_NAME error\n snip_comment : SNIPPET_COMMENT error\n " + "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snip_cr_text : SNIPPET_CR_TEXT error\n " + "snip_lic_comment : SNIPPET_LICS_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " + "snip_lics_conc : SNIPPET_LICS_CONC error\n snip_lics_info : SNIPPET_LICS_INFO error\n " + "snip_byte_range : SNIPPET_BYTE_RANGE error\n snip_line_range : SNIPPET_LINE_RANGE error\n ") + def p_snippet_value_error(self, p): self.current_element["logger"].append( - f"Error while parsing ValidUntilDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - # parsing methods for snippet @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") def p_snippet_spdx_id(self, p): self.initialize_new_current_element(Snippet) @@ -719,83 +469,43 @@ def p_snippet_spdx_id(self, p): def p_snippet_spdx_id_error(self, p): self.initialize_new_current_element(Snippet) self.current_element["logger"].append( - f"Error while parsing SnippetSPDXID: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("snip_name : SNIPPET_NAME LINE") def p_snippet_name(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element, argument_name="name") - @grammar_rule("snip_name : SNIPPET_NAME error") - def p_snippet_name_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetName: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") def p_snippet_comment(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element, argument_name="comment") - @grammar_rule("snip_comment : SNIPPET_COMMENT error") - def p_snippet_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") def p_snippet_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Snippet) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error") - def p_snippet_attribution_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetAttributionText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") def p_snippet_copyright_text(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element, argument_name="copyright_text") - @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT error") - def p_snippet_copyright_text_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetCopyrightText: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") def p_snippet_license_comment(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element, argument_name="license_comment") - @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT error") - def p_snippet_license_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetLicenseComments: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID LINE") def p_snippet_from_file_spdxid(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element) - @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID error") - def p_snippet_from_file_spdxid_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetFromFileSPDXID: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") def p_snippet_concluded_license(self, p): self.check_that_current_element_matches_class_for_value(Snippet) set_value(p, self.current_element, argument_name="license_concluded") - @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC error") - def p_snippet_concluded_license_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetLicenseConcluded: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") def p_snippet_license_info(self, p): self.check_that_current_element_matches_class_for_value(Snippet) @@ -804,12 +514,6 @@ def p_snippet_license_info(self, p): else: self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) - @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO error") - def p_snippet_license_info_error(self, p): - self.current_element["logger"].append( - f"Error while parsing LicenseInfoInSnippet: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") def p_snippet_byte_range(self, p): self.check_that_current_element_matches_class_for_value(Snippet) @@ -824,11 +528,6 @@ def p_snippet_byte_range(self, p): endpoint = int(p[2].split(":")[-1]) self.current_element["byte_range"] = startpoint, endpoint - @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE error") - def p_snippet_byte_range_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SnippetByteRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_line_range(self, p): self.check_that_current_element_matches_class_for_value(Snippet) @@ -844,12 +543,13 @@ def p_snippet_line_range(self, p): endpoint = int(p[2].split(":")[1]) self.current_element["line_range"] = startpoint, endpoint - @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE error") - def p_snippet_line_range_error(self, p): + # parsing methods for annotation + @grammar_rule("annotation_date : ANNOTATION_DATE error\n annotation_comment : ANNOTATION_COMMENT error\n " + "annotation_type : ANNOTATION_TYPE error\n annotation_spdx_id : ANNOTATION_SPDX_ID error") + def p_annotation_value_error(self, p): self.current_element["logger"].append( - f"Error while parsing SnippetLineRange: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - # parsing methods for annotation def p_annotator(self, p): """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" self.initialize_new_current_element(Annotation) @@ -866,31 +566,16 @@ def p_annotation_date(self, p): self.check_that_current_element_matches_class_for_value(Annotation) set_value(p, self.current_element, method_to_apply=datetime_from_str) - @grammar_rule("annotation_date : ANNOTATION_DATE error") - def p_annotation_date_error(self, p): - self.current_element["logger"].append( - f"Error while parsing AnnotationDate: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") def p_annotation_comment(self, p): self.check_that_current_element_matches_class_for_value(Annotation) set_value(p, self.current_element) - @grammar_rule("annotation_comment : ANNOTATION_COMMENT error") - def p_annotation_comment_error(self, p): - self.current_element["logger"].append( - f"Error while parsing AnnotationComment: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") def p_annotation_type(self, p): self.check_that_current_element_matches_class_for_value(Annotation) set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) - @grammar_rule("annotation_type : ANNOTATION_TYPE error") - def p_annotation_type_error(self, p): - self.current_element["logger"].append( - f"Error while parsing AnnotationType: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("annotation_type_value : OTHER\n| REVIEW") def p_annotation_type_value(self, p): p[0] = p[1] @@ -899,12 +584,6 @@ def p_annotation_type_value(self, p): def p_annotation_spdx_id(self, p): set_value(p, self.current_element, argument_name="spdx_id") - @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID error") - def p_annotation_spdx_id_error(self, p): - self.current_element["logger"].append( - f"Error while parsing SPDXREF in annotation: Token did not match specified grammar rule. " - f"Line: {p.lineno(1)}") - # parsing methods for relationship @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP relationship_value") @@ -934,7 +613,7 @@ def p_relationship(self, p): def p_relationship_error(self, p): self.initialize_new_current_element(Relationship) self.current_element["logger"].append( - f"Error while parsing Relationship: Token did not match specified grammar rule. Line: {p.lineno(1)}") + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @grammar_rule("relationship_value : DOC_REF_ID LINE") def p_relationship_value_with_doc_ref(self, p): diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index 1b37b2b5a..6ca32efc7 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -64,5 +64,5 @@ def test_parse_invalid_file(): parser.parse(file_str) assert err.value.get_messages() == [["Error while parsing File: ['Error while parsing FileType: Token did not " - "match any of the valid values. Line: 3', 'Error while parsing Checksum in " - "file: Token did not match specified grammar rule. Line: 5']"]] + "match specified grammar rule. Line: 3', 'Error while parsing FileChecksum: " + "Token did not match specified grammar rule. Line: 5']"]] From 71a16d6e25e0068cf3eda9048c34144885fd9295 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 14:41:35 +0100 Subject: [PATCH 31/43] [issue-382] add tests for contains relationship Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 9 +++++-- .../parser/tagvalue/test_tag_value_parser.py | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 9f8f78043..b17e99045 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -99,7 +99,8 @@ def p_attrib(self, p): pass # general parsing methods - @grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG DATE\n | UNKNOWN_TAG PERSON_VALUE") + @grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG DATE\n | UNKNOWN_TAG PERSON_VALUE \n" + "| UNKNOWN_TAG") def p_unknown_tag(self, p): self.logger.append(f"Unknown tag provided in line {p.lineno(1)}") @@ -681,7 +682,11 @@ def check_for_preceding_package_and_build_contains_relationship(self): file_spdx_id = self.current_element["spdx_id"] if "packages" not in self.elements_build: return + # We assume that all files that are not contained in a package precede any package information. Any file + # information that follows any package information is assigned to the last parsed package by creating a + # corresponding contains relationship. + # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) package_spdx_id = self.elements_build["packages"][-1].spdx_id relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) - if relationship not in self.elements_build["relationships"]: + if relationship not in self.elements_build.setdefault("relationships",[]): self.elements_build.setdefault("relationships", []).append(relationship) diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index e96fa9c80..9dde9dacf 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -14,8 +14,10 @@ import pytest from spdx.model.document import Document +from spdx.model.relationship import RelationshipType, Relationship from spdx.parser.error import SPDXParsingError from spdx.parser.tagvalue.parser.tagvalue import Parser +from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_unknown_tag(): @@ -40,3 +42,27 @@ def test_tag_value_parser(): assert len(doc.snippets) == 1 assert len(doc.relationships) == 13 assert len(doc.extracted_licensing_info) == 5 + + +def test_building_contains_relationship(): + parser = Parser() + document_str = "\n".join( + [DOCUMENT_STR, "SPDXID: SPDXRef-DOCUMENT", "FileName: File without package", "SPDXID: SPDXRef-File", + "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "PackageName: Package with two files", "SPDXID: SPDXRef-Package-with-two-files", + "PackageDownloadLocation: https://download.com", + "FileName: File in package", "SPDXID: SPDXRef-File-in-Package", + "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "FileName: Second file in package", "SPDXID: SPDXRef-Second-File-in-Package", + "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "PackageName: Second package with file", "SPDXID: SPDXRef-Package-with-one-file", + "PackageDownloadLocation: https://download.com", + "FileName: File in package", "SPDXID: SPDXRef-File-in-different-Package", + "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + ]) + document = parser.parse(document_str) + + assert document.relationships == [ + Relationship("SPDXRef-Package-with-two-files", RelationshipType.CONTAINS, "SPDXRef-File-in-Package"), + Relationship("SPDXRef-Package-with-two-files", RelationshipType.CONTAINS, "SPDXRef-Second-File-in-Package"), + Relationship("SPDXRef-Package-with-one-file", RelationshipType.CONTAINS, "SPDXRef-File-in-different-Package")] From 11d6d41ea702ed357195433c4fc6d916dbcff158 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 14:41:50 +0100 Subject: [PATCH 32/43] [issue-382] reformat Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 34 +++++++++++---------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index b17e99045..f639cb6d7 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -545,6 +545,7 @@ def p_snippet_line_range(self, p): self.current_element["line_range"] = startpoint, endpoint # parsing methods for annotation + @grammar_rule("annotation_date : ANNOTATION_DATE error\n annotation_comment : ANNOTATION_COMMENT error\n " "annotation_type : ANNOTATION_TYPE error\n annotation_spdx_id : ANNOTATION_SPDX_ID error") def p_annotation_value_error(self, p): @@ -586,6 +587,7 @@ def p_annotation_spdx_id(self, p): set_value(p, self.current_element, argument_name="spdx_id") # parsing methods for relationship + @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " "| RELATIONSHIP relationship_value") def p_relationship(self, p): @@ -642,6 +644,22 @@ def parse(self, text): document = construct_or_raise_parsing_error(Document, self.elements_build) return document + def initialize_new_current_element(self, class_name: Any): + if "class" in self.current_element and "spdx_id" in self.current_element: + self.element_stack.append({self.current_element["class"]: self.current_element["spdx_id"]}) + self.construct_current_element() + self.current_element["class"] = class_name + + def check_that_current_element_matches_class_for_value(self, expected_class): + if "class" not in self.current_element: + self.logger.append( + f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") + elif expected_class != self.current_element["class"]: + self.logger.append( + f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") + def construct_current_element(self): if "class" not in self.current_element: self.current_element = {"logger": Logger()} @@ -662,22 +680,6 @@ def construct_current_element(self): self.logger.append(err.get_messages()) self.current_element = {"logger": Logger()} - def check_that_current_element_matches_class_for_value(self, expected_class): - if "class" not in self.current_element: - self.logger.append( - f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " - f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") - elif expected_class != self.current_element["class"]: - self.logger.append( - f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " - f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") - - def initialize_new_current_element(self, class_name: Any): - if "class" in self.current_element and "spdx_id" in self.current_element: - self.element_stack.append({self.current_element["class"]: self.current_element["spdx_id"]}) - self.construct_current_element() - self.current_element["class"] = class_name - def check_for_preceding_package_and_build_contains_relationship(self): file_spdx_id = self.current_element["spdx_id"] if "packages" not in self.elements_build: From 66399f8366c530b9a76d62fc43422956b6796ee8 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 16:13:24 +0100 Subject: [PATCH 33/43] [issue-382] use tag-value parser to test tag-value writer Signed-off-by: Meret Behrens --- tests/spdx/writer/tagvalue/test_tagvalue_writer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/spdx/writer/tagvalue/test_tagvalue_writer.py b/tests/spdx/writer/tagvalue/test_tagvalue_writer.py index 556b724ac..f4af5a505 100644 --- a/tests/spdx/writer/tagvalue/test_tagvalue_writer.py +++ b/tests/spdx/writer/tagvalue/test_tagvalue_writer.py @@ -13,6 +13,7 @@ import pytest +from spdx.parser.tagvalue.parser import tagvalue_parser from tests.spdx.fixtures import document_fixture from spdx.writer.tagvalue.tagvalue_writer import write_document_to_file @@ -29,5 +30,6 @@ def test_write_tag_value(temporary_file_path: str): write_document_to_file(document, temporary_file_path) - # without a tag-value parser we can only test that no errors occur while writing - # as soon as the tag-value parser is implemented (https://github.com/spdx/tools-python/issues/382) we can test for equality between the temporary file and the expected file in ./expected_results + parsed_document = tagvalue_parser.parse_from_file(temporary_file_path) + + assert parsed_document == document From 3fbe4d34d570feb576598d6d777bf78242d15dc0 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 2 Mar 2023 16:38:32 +0100 Subject: [PATCH 34/43] [issue-382] fix logging Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 6 ++--- .../parser/tagvalue/test_annotation_parser.py | 20 +++++++------- .../tagvalue/test_creation_info_parser.py | 16 ++++++------ .../spdx/parser/tagvalue/test_file_parser.py | 6 ++--- .../parser/tagvalue/test_package_parser.py | 26 +++++++++---------- .../tagvalue/test_relationship_parser.py | 10 +++---- .../parser/tagvalue/test_snippet_parser.py | 12 ++++----- 7 files changed, 48 insertions(+), 48 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index f639cb6d7..c2f655f0f 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -637,7 +637,7 @@ def parse(self, text): try: raise_parsing_error_if_logger_has_messages(self.creation_info.pop("logger"), "CreationInfo") except SPDXParsingError as err: - self.logger.append(err.get_messages()) + self.logger.extend(err.get_messages()) raise_parsing_error_if_logger_has_messages(self.logger) creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) self.elements_build["creation_info"] = creation_info @@ -668,7 +668,7 @@ def construct_current_element(self): try: raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), class_name.__name__) except SPDXParsingError as err: - self.logger.append(err.get_messages()) + self.logger.extend(err.get_messages()) self.current_element = {"logger": Logger()} return try: @@ -677,7 +677,7 @@ def construct_current_element(self): if class_name == File: self.check_for_preceding_package_and_build_contains_relationship() except SPDXParsingError as err: - self.logger.append(err.get_messages()) + self.logger.extend(err.get_messages()) self.current_element = {"logger": Logger()} def check_for_preceding_package_and_build_contains_relationship(self): diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index e0b7b515d..3cf7fa3a0 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -39,19 +39,19 @@ def test_parse_annotation(): @pytest.mark.parametrize("annotation_str, expected_message", [ - ('Annotator: Person: Jane Doe()', [['Error while constructing Annotation: Annotation.__init__() missing 4 ' - "required positional arguments: 'spdx_id', 'annotation_type', " - "'annotation_date', and 'annotation_comment'"]]), + ('Annotator: Person: Jane Doe()', ['Error while constructing Annotation: Annotation.__init__() missing 4 ' + "required positional arguments: 'spdx_id', 'annotation_type', " + "'annotation_date', and 'annotation_comment'"]), ('Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23', - [["Error while parsing Annotation: ['Error while parsing AnnotationType: Token " - "did not match specified grammar rule. Line: 2', 'Error while parsing " - "AnnotationDate: Token did not match specified grammar rule. Line: 3']"]]), + ["Error while parsing Annotation: ['Error while parsing AnnotationType: Token " + "did not match specified grammar rule. Line: 2', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 3']"]), ('Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n' 'AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT', - [["Error while parsing Annotation: ['Error while parsing Annotator: Token did " - "not match specified grammar rule. Line: 1', 'Error while parsing " - "AnnotationDate: Token did not match specified grammar rule. Line: 2']"]]), - ('Annotator: Person: ()', [["Error while parsing Annotation: [['No name for Person provided: Person: ().']]"]]), + ["Error while parsing Annotation: ['Error while parsing Annotator: Token did " + "not match specified grammar rule. Line: 1', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 2']"]), + ('Annotator: Person: ()', ["Error while parsing Annotation: [['No name for Person provided: Person: ().']]"]), ('AnnotationType: REVIEW', ['Element Annotation is not the current element in scope, probably the ' 'expected tag to start the element (Annotator) is missing.'])]) def test_parse_invalid_annotation(annotation_str, expected_message): diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index de5a794c3..f871e304c 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -70,12 +70,12 @@ def test_parse_creation_info(): 'Creator: Person Bob (bob@example.com)', 'Creator: Organization: Acme [email]', 'Created: 2010-02-03T00:00:0Z', 'CreatorComment: Sample Comment', 'LicenseListVersion: 7']), - [["Error while parsing CreationInfo: " - "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " - "Line: 6', 'Error while parsing ExternalDocumentRef: " - "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " - "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " - "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"]]), + ["Error while parsing CreationInfo: " + "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " + "Line: 6', 'Error while parsing ExternalDocumentRef: " + "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " + "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " + "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"]), ('\n'.join( ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', 'SPDXID: SPDXRef-DOCUMENT']), @@ -83,8 +83,8 @@ def test_parse_creation_info(): "required positional arguments: 'document_namespace', 'creators', and " "'created'"]), ('LicenseListVersion: 3.5\nLicenseListVersion: 3.7', - [["Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " - "found. Line: 2']"]])])) + ["Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " + "found. Line: 2']"])])) def test_parse_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index 6ca32efc7..7ca5c4118 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -63,6 +63,6 @@ def test_parse_invalid_file(): with pytest.raises(SPDXParsingError) as err: parser.parse(file_str) - assert err.value.get_messages() == [["Error while parsing File: ['Error while parsing FileType: Token did not " - "match specified grammar rule. Line: 3', 'Error while parsing FileChecksum: " - "Token did not match specified grammar rule. Line: 5']"]] + assert err.value.get_messages() == ["Error while parsing File: ['Error while parsing FileType: Token did not " + "match specified grammar rule. Line: 3', 'Error while parsing FileChecksum: " + "Token did not match specified grammar rule. Line: 5']"] diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index 9f2de8997..a65650358 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -79,26 +79,26 @@ def test_parse_package(): ['Element Package is not the current element in scope, probably the expected ' 'tag to start the element (PackageName) is missing.']), ('PackageName: TestPackage', - [['Error while constructing Package: Package.__init__() missing 2 required ' - "positional arguments: 'spdx_id' and 'download_location'"]]), + ['Error while constructing Package: Package.__init__() missing 2 required ' + "positional arguments: 'spdx_id' and 'download_location'"]), ('PackageName: TestPackage\nPackageCopyrightText:This is a copyright\n' 'PackageCopyrightText:MultipleCopyright', - [["Error while parsing Package: ['Multiple values for PackageCopyrightText " - "found. Line: 3']"]]), + ["Error while parsing Package: ['Multiple values for PackageCopyrightText " + "found. Line: 3']"]), ('PackageName: TestPackage\nExternalRef: reference locator', - [['Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' - 'category, reference_type and locator. Line: 2"]']]), + ['Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' + 'category, reference_type and locator. Line: 2"]']), ('PackageName: TestPackage\nExternalRef: category reference locator', - [["Error while parsing Package: ['Invalid ExternalPackageRefCategory: " - "category']"]]), + ["Error while parsing Package: ['Invalid ExternalPackageRefCategory: " + "category']"]), ('SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n' 'PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator', - [["Error while parsing Package: ['Error while parsing PackageVerificationCode: " - "Value did not match expected format. Line: 5']"]]), + ["Error while parsing Package: ['Error while parsing PackageVerificationCode: " + "Value did not match expected format. Line: 5']"]), ('PackageName: TestPackage\nBuiltDate: 2012\nValidUntilDate:202-11-02T00:00', - [["Error while parsing Package: ['Error while parsing BuiltDate: Token did not " - "match specified grammar rule. Line: 2', 'Error while parsing " - "ValidUntilDate: Token did not match specified grammar rule. Line: 3']"]]) + ["Error while parsing Package: ['Error while parsing BuiltDate: Token did not " + "match specified grammar rule. Line: 2', 'Error while parsing " + "ValidUntilDate: Token did not match specified grammar rule. Line: 3']"]) ]) def test_parse_invalid_package(package_str, expected_message): parser = Parser() diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index f6776299b..dc93c1dc4 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -42,13 +42,13 @@ def test_parse_relationship(relationship_str, expected_relationship): @pytest.mark.parametrize("relationship_str, expected_message", [("Relationship: spdx_id DESCRIBES", - [['Error while parsing Relationship: ["Relationship couldn\'t be split in spdx_element_id, ' - 'relationship_type and related_spdx_element. Line: 1"]']]), + ['Error while parsing Relationship: ["Relationship couldn\'t be split in spdx_element_id, ' + 'relationship_type and related_spdx_element. Line: 1"]']), ("Relationship: spdx_id IS spdx_id", - [["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"]]), + ["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"]), ("Relationship: spdx_id IS spdx_id\nRelationshipComment: SOURCE", - [["Error while parsing Relationship: ['Error while parsing Relationship: Token " - "did not match specified grammar rule. Line: 1']"]]) + ["Error while parsing Relationship: ['Error while parsing Relationship: Token " + "did not match specified grammar rule. Line: 1']"]) ]) def test_parse_invalid_relationship(relationship_str, expected_message): parser = Parser() diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index a6f84d415..f4c62c68f 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -60,13 +60,13 @@ def test_parse_snippet(): ('SnippetName: TestSnippet', ['Element Snippet is not the current element in scope, probably the expected ' 'tag to start the element (SnippetSPDXID) is missing.']), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4', - [['Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' - 'valid range pattern."]']]), + ['Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' + 'valid range pattern."]']), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23', - [["Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " - "Line: 3']"]]), - ('SnippetSPDXID: SPDXRef-Snippet', [['Error while constructing Snippet: Snippet.__init__() missing 2 required ' - "positional arguments: 'file_spdx_id' and 'byte_range'"]]) + ["Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " + "Line: 3']"]), + ('SnippetSPDXID: SPDXRef-Snippet', ['Error while constructing Snippet: Snippet.__init__() missing 2 required ' + "positional arguments: 'file_spdx_id' and 'byte_range'"]) ]) def test_parse_invalid_snippet(snippet_str, expected_message): parser = Parser() From 1a6f7448af937f74c751972c728492aef9db1eee Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 3 Mar 2023 09:14:48 +0100 Subject: [PATCH 35/43] [issue-382] delete element_stack as it is not needed Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index c2f655f0f..c5b5204b1 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -46,7 +46,6 @@ class Parser(object): tokens: List[str] logger: Logger - element_stack: List[Dict[str, str]] current_element: Dict[str, Any] creation_info: Dict[str, Any] elements_build: Dict[str, Any] @@ -56,7 +55,6 @@ class Parser(object): def __init__(self, **kwargs): self.tokens = SPDXLexer.tokens self.logger = Logger() - self.element_stack = [] self.current_element = {"logger": Logger()} self.creation_info = {"logger": Logger()} self.elements_build = dict() @@ -645,8 +643,6 @@ def parse(self, text): return document def initialize_new_current_element(self, class_name: Any): - if "class" in self.current_element and "spdx_id" in self.current_element: - self.element_stack.append({self.current_element["class"]: self.current_element["spdx_id"]}) self.construct_current_element() self.current_element["class"] = class_name From 6d06e60435b12e6babfdb21da8e9f3455226d02e Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 3 Mar 2023 09:33:04 +0100 Subject: [PATCH 36/43] [issue-382] add line number to error messages Signed-off-by: Meret Behrens --- .../parser/tagvalue/parser/helper_methods.py | 4 +- src/spdx/parser/tagvalue/parser/tagvalue.py | 106 +++++++++--------- .../parser/tagvalue/test_annotation_parser.py | 2 +- .../test_extracted_licensing_info_parser.py | 10 +- .../parser/tagvalue/test_package_parser.py | 2 +- .../parser/tagvalue/test_snippet_parser.py | 4 +- 6 files changed, 66 insertions(+), 62 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py index c47e5b5c0..869aa21e5 100644 --- a/src/spdx/parser/tagvalue/parser/helper_methods.py +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -37,12 +37,12 @@ def str_from_text(text: Optional[str]) -> Optional[str]: return None -def parse_checksum(logger: Logger, checksum_str: str) -> Optional[Checksum]: +def parse_checksum(logger: Logger, checksum_str: str, line_number: int) -> Optional[Checksum]: try: algorithm, value = checksum_str.split(":") except ValueError: logger.append( - f"Couldn't split value for checksum in algorithm and value.") + f"Couldn't split value for checksum in algorithm and value. Line: {line_number}") return None algorithm = ChecksumAlgorithm[algorithm.upper().replace("-", "_")] value = value.strip() diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index c5b5204b1..1c9cb4512 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -164,7 +164,7 @@ def p_doc_name(self, p): def p_external_document_ref(self, p): document_ref_id = p[2] document_uri = p[3] - checksum = parse_checksum(self.creation_info["logger"], p[4]) + checksum = parse_checksum(self.creation_info["logger"], p[4], p.lineno(1)) external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) @@ -186,7 +186,7 @@ def p_extracted_licensing_info_value_error(self, p): @grammar_rule("license_name : LICS_NAME line_or_no_assertion\n extracted_text : LICS_TEXT text_or_line") def p_generic_value_extracted_licensing_info(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) set_value(p, self.current_element) @grammar_rule("license_id : LICS_ID LINE") @@ -196,12 +196,12 @@ def p_extracted_license_id(self, p): @grammar_rule("lic_xref : LICS_CRS_REF LINE") def p_extracted_cross_reference(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) self.current_element.setdefault("cross_references", []).append(p[2]) @grammar_rule("lic_comment : LICS_COMMENT text_or_line") def p_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo) + self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) set_value(p, self.current_element, argument_name="comment") # parsing methods for file @@ -228,32 +228,32 @@ def p_file_name_error(self, p): @grammar_rule("file_contrib : FILE_CONTRIB LINE") def p_file_contributor(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) self.current_element.setdefault("contributors", []).append(p[2]) @grammar_rule("file_notice : FILE_NOTICE text_or_line") def p_file_notice(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) set_value(p, self.current_element, argument_name="notice") @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") def p_file_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") def p_file_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") def p_file_attribution_text(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") def p_file_license_info(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_file"] = p[2] return @@ -261,12 +261,12 @@ def p_file_license_info(self, p): @grammar_rule("file_comment : FILE_COMMENT text_or_line") def p_file_comment(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) set_value(p, self.current_element, argument_name="comment") @grammar_rule("file_type : FILE_TYPE file_type_value") def p_file_type(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) self.current_element.setdefault("file_type", []).append(FileType[p[2]]) @grammar_rule( @@ -277,13 +277,13 @@ def p_file_type_value(self, p): @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum(self, p): - self.check_that_current_element_matches_class_for_value(File) - checksum = parse_checksum(self.current_element["logger"], p[2]) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) + checksum = parse_checksum(self.current_element["logger"], p[2], p.lineno(1)) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") def p_file_license_concluded(self, p): - self.check_that_current_element_matches_class_for_value(File) + self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) set_value(p, self.current_element, argument_name="license_concluded") # parsing methods for package @@ -308,7 +308,7 @@ def p_package_value_error(self, p): "download_location : PKG_DOWN line_or_no_assertion_or_none\n " "originator : PKG_ORIG actor_or_no_assertion\n supplier : PKG_SUPPL actor_or_no_assertion") def p_generic_package_value(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element) @grammar_rule("package_name : PKG_NAME LINE") @@ -324,22 +324,22 @@ def p_package_name_error(self, p): @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") def p_pkg_comment(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="comment") @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") def p_pkg_attribution_text(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") def p_pkg_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_external_refs(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) try: category, reference_type, locator = p[2].split(" ") except ValueError: @@ -367,17 +367,17 @@ def p_pkg_external_refs(self, p): @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") def p_pkg_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") def p_pkg_license_declared(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="license_declared") @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") def p_pkg_license_info_from_file(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_from_files"] = p[2] else: @@ -385,18 +385,18 @@ def p_pkg_license_info_from_file(self, p): @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") def p_pkg_license_concluded(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="license_concluded") @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum(self, p): - self.check_that_current_element_matches_class_for_value(Package) - checksum = parse_checksum(self.current_element["logger"], p[2]) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) + checksum = parse_checksum(self.current_element["logger"], p[2], p.lineno(1)) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("verification_code : PKG_VERF_CODE LINE") def p_pkg_verification_code(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) if str(p.slice[0]) in self.current_element: self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") return @@ -416,7 +416,7 @@ def p_pkg_verification_code(self, p): @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") def p_pkg_files_analyzed(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) if str(p.slice[0]) in self.current_element: self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") return @@ -424,17 +424,17 @@ def p_pkg_files_analyzed(self, p): @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") def p_pkg_file_name(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="file_name") @grammar_rule("package_version : PKG_VERSION LINE") def p_package_version(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, argument_name="version") @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") def p_primary_package_purpose(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) @grammar_rule("primary_package_purpose_value : APPLICATION\n | FRAMEWORK\n | LIBRARY\n | CONTAINER\n " @@ -445,7 +445,7 @@ def p_primary_package_purpose_value(self, p): @grammar_rule("built_date : BUILT_DATE DATE\n release_date : RELEASE_DATE DATE\n " "valid_until_date : VALID_UNTIL_DATE DATE") def p_package_dates(self, p): - self.check_that_current_element_matches_class_for_value(Package) + self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) set_value(p, self.current_element, method_to_apply=datetime_from_str) # parsing methods for snippet @@ -472,42 +472,42 @@ def p_snippet_spdx_id_error(self, p): @grammar_rule("snip_name : SNIPPET_NAME LINE") def p_snippet_name(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element, argument_name="name") @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") def p_snippet_comment(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element, argument_name="comment") @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") def p_snippet_attribution_text(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") def p_snippet_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element, argument_name="copyright_text") @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") def p_snippet_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element, argument_name="license_comment") @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID LINE") def p_snippet_from_file_spdxid(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element) @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") def p_snippet_concluded_license(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) set_value(p, self.current_element, argument_name="license_concluded") @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") def p_snippet_license_info(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_snippet"] = p[2] else: @@ -515,13 +515,14 @@ def p_snippet_license_info(self, p): @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") def p_snippet_byte_range(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) if "byte_range" in self.current_element: self.current_element["logger"].append( f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): - self.current_element["logger"].append("Value for SnippetByteRange doesn't match valid range pattern.") + self.current_element["logger"].append(f"Value for SnippetByteRange doesn't match valid range pattern. " + f"Line: {p.lineno(1)}") return startpoint = int(p[2].split(":")[0]) endpoint = int(p[2].split(":")[-1]) @@ -529,14 +530,15 @@ def p_snippet_byte_range(self, p): @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_line_range(self, p): - self.check_that_current_element_matches_class_for_value(Snippet) + self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) if "line_range" in self.current_element: self.current_element["logger"].append( f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") return range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): - self.current_element["logger"].append("Value for SnippetLineRange doesn't match valid range pattern.") + self.current_element["logger"].append(f"Value for SnippetLineRange doesn't match valid range pattern. " + f"Line: {p.lineno(1)}") return startpoint = int(p[2].split(":")[0]) endpoint = int(p[2].split(":")[1]) @@ -563,17 +565,17 @@ def p_annotator_error(self, p): @grammar_rule("annotation_date : ANNOTATION_DATE DATE") def p_annotation_date(self, p): - self.check_that_current_element_matches_class_for_value(Annotation) + self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) set_value(p, self.current_element, method_to_apply=datetime_from_str) @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") def p_annotation_comment(self, p): - self.check_that_current_element_matches_class_for_value(Annotation) + self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) set_value(p, self.current_element) @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") def p_annotation_type(self, p): - self.check_that_current_element_matches_class_for_value(Annotation) + self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) @grammar_rule("annotation_type_value : OTHER\n| REVIEW") @@ -646,15 +648,17 @@ def initialize_new_current_element(self, class_name: Any): self.construct_current_element() self.current_element["class"] = class_name - def check_that_current_element_matches_class_for_value(self, expected_class): + def check_that_current_element_matches_class_for_value(self, expected_class, line_number): if "class" not in self.current_element: self.logger.append( f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " - f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " + f"Line: {line_number}") elif expected_class != self.current_element["class"]: self.logger.append( f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " - f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing.") + f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " + f"Line: {line_number}") def construct_current_element(self): if "class" not in self.current_element: @@ -686,5 +690,5 @@ def check_for_preceding_package_and_build_contains_relationship(self): # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) package_spdx_id = self.elements_build["packages"][-1].spdx_id relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) - if relationship not in self.elements_build.setdefault("relationships",[]): + if relationship not in self.elements_build.setdefault("relationships", []): self.elements_build.setdefault("relationships", []).append(relationship) diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index 3cf7fa3a0..7e99bffcd 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -53,7 +53,7 @@ def test_parse_annotation(): "AnnotationDate: Token did not match specified grammar rule. Line: 2']"]), ('Annotator: Person: ()', ["Error while parsing Annotation: [['No name for Person provided: Person: ().']]"]), ('AnnotationType: REVIEW', ['Element Annotation is not the current element in scope, probably the ' - 'expected tag to start the element (Annotator) is missing.'])]) + 'expected tag to start the element (Annotator) is missing. Line: 1'])]) def test_parse_invalid_annotation(annotation_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py index ffffcafb5..a8c1c2f66 100644 --- a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -46,7 +46,7 @@ def test_parse_extracted_licensing_info(): def test_parse_invalid_extracted_licensing_info(): parser = Parser() extracted_licensing_info_str = '\n'.join([ - 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp', 'LicenseName: Beer-Ware License (Version 42)', 'LicenseCrossReference: http://people.freebsd.org/~phk/', 'LicenseComment: The beerware license has a couple of other standard variants.']) @@ -55,10 +55,10 @@ def test_parse_invalid_extracted_licensing_info(): parser.parse(extracted_licensing_info_str) assert err.value.get_messages() == ['Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing.', + 'the expected tag to start the element (LicenseID) is missing. Line: 1', 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing.', + 'the expected tag to start the element (LicenseID) is missing. Line: 2', 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing.', + 'the expected tag to start the element (LicenseID) is missing. Line: 3', 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing.'] + 'the expected tag to start the element (LicenseID) is missing. Line: 4'] diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index a65650358..0ecf9a5e5 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -77,7 +77,7 @@ def test_parse_package(): @pytest.mark.parametrize("package_str, expected_message", [('PackageDownloadLocation: SPDXRef-Package', ['Element Package is not the current element in scope, probably the expected ' - 'tag to start the element (PackageName) is missing.']), + 'tag to start the element (PackageName) is missing. Line: 1']), ('PackageName: TestPackage', ['Error while constructing Package: Package.__init__() missing 2 required ' "positional arguments: 'spdx_id' and 'download_location'"]), diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index f4c62c68f..0dd981fe3 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -58,10 +58,10 @@ def test_parse_snippet(): @pytest.mark.parametrize("snippet_str, expected_message", [ ('SnippetName: TestSnippet', ['Element Snippet is not the current element in scope, probably the expected ' - 'tag to start the element (SnippetSPDXID) is missing.']), + 'tag to start the element (SnippetSPDXID) is missing. Line: 1']), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4', ['Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' - 'valid range pattern."]']), + 'valid range pattern. Line: 2"]']), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23', ["Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " "Line: 3']"]), From 23c558fac8d28fb09be118411f1f7ca109f52e34 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Fri, 3 Mar 2023 10:55:10 +0100 Subject: [PATCH 37/43] squashed review commits concerning test improvement [review] assert that only one relationship exists [review] improve tests for package parser [review] don't use assertCountEqual if list contains only one element [review] add newline to field [review] delete default values in datetime [fix] soften comparison of error messages to also support older python versions Signed-off-by: Meret Behrens --- .../parser/tagvalue/test_annotation_parser.py | 28 ++++++------ .../tagvalue/test_creation_info_parser.py | 38 ++++++++-------- .../parser/tagvalue/test_package_parser.py | 43 ++++++++++--------- .../tagvalue/test_relationship_parser.py | 1 + .../parser/tagvalue/test_snippet_parser.py | 18 ++++---- 5 files changed, 65 insertions(+), 63 deletions(-) diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index 7e99bffcd..7df26534b 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -8,7 +8,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re from datetime import datetime +from unittest import TestCase import pytest @@ -39,24 +41,24 @@ def test_parse_annotation(): @pytest.mark.parametrize("annotation_str, expected_message", [ - ('Annotator: Person: Jane Doe()', ['Error while constructing Annotation: Annotation.__init__() missing 4 ' - "required positional arguments: 'spdx_id', 'annotation_type', " - "'annotation_date', and 'annotation_comment'"]), + ('Annotator: Person: Jane Doe()', r"__init__() missing 4 " + "required positional arguments: 'spdx_id', 'annotation_type', " + "'annotation_date', and 'annotation_comment'"), ('Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23', - ["Error while parsing Annotation: ['Error while parsing AnnotationType: Token " - "did not match specified grammar rule. Line: 2', 'Error while parsing " - "AnnotationDate: Token did not match specified grammar rule. Line: 3']"]), + "Error while parsing Annotation: ['Error while parsing AnnotationType: Token " + "did not match specified grammar rule. Line: 2', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 3']"), ('Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n' 'AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT', - ["Error while parsing Annotation: ['Error while parsing Annotator: Token did " - "not match specified grammar rule. Line: 1', 'Error while parsing " - "AnnotationDate: Token did not match specified grammar rule. Line: 2']"]), - ('Annotator: Person: ()', ["Error while parsing Annotation: [['No name for Person provided: Person: ().']]"]), - ('AnnotationType: REVIEW', ['Element Annotation is not the current element in scope, probably the ' - 'expected tag to start the element (Annotator) is missing. Line: 1'])]) + "Error while parsing Annotation: ['Error while parsing Annotator: Token did " + "not match specified grammar rule. Line: 1', 'Error while parsing " + "AnnotationDate: Token did not match specified grammar rule. Line: 2']"), + ('Annotator: Person: ()', "Error while parsing Annotation: [['No name for Person provided: Person: ().']]"), + ('AnnotationType: REVIEW', 'Element Annotation is not the current element in scope, probably the ' + 'expected tag to start the element (Annotator) is missing. Line: 1')]) def test_parse_invalid_annotation(annotation_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(annotation_str) - assert err.value.get_messages() == expected_message + assert expected_message in err.value.get_messages()[0] diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index f871e304c..f98f997dd 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -31,7 +31,7 @@ 'Creator: Person: Bob (bob@example.com)', 'Creator: Organization: Acme.', 'Created: 2010-02-03T00:00:00Z', - 'CreatorComment: Sample Comment', + 'CreatorComment: Sample Comment \nwith multiple \nlines.', 'LicenseListVersion: 3.17' ]) @@ -51,14 +51,13 @@ def test_parse_creation_info(): TestCase().assertCountEqual(creation_info.creators, [Actor(ActorType.PERSON, "Bob", "bob@example.com"), Actor(ActorType.ORGANIZATION, "Acme.")]) - assert creation_info.creator_comment == 'Sample Comment' - assert creation_info.created == datetime(2010, 2, 3, 0, 0) + assert creation_info.creator_comment == 'Sample Comment \nwith multiple \nlines.' + assert creation_info.created == datetime(2010, 2, 3) assert creation_info.license_list_version == Version(3, 17) - TestCase().assertCountEqual(creation_info.external_document_refs, - [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", - "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", - Checksum(ChecksumAlgorithm.SHA1, - "d6a770ba38583ed4bb4525bd96e50461655d2759"))]) + assert creation_info.external_document_refs == [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", + "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Checksum(ChecksumAlgorithm.SHA1, + "d6a770ba38583ed4bb4525bd96e50461655d2759"))] @pytest.mark.parametrize("document_str, expected_message", @@ -70,23 +69,22 @@ def test_parse_creation_info(): 'Creator: Person Bob (bob@example.com)', 'Creator: Organization: Acme [email]', 'Created: 2010-02-03T00:00:0Z', 'CreatorComment: Sample Comment', 'LicenseListVersion: 7']), - ["Error while parsing CreationInfo: " - "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " - "Line: 6', 'Error while parsing ExternalDocumentRef: " - "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " - "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " - "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"]), + "Error while parsing CreationInfo: " + "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " + "Line: 6', 'Error while parsing ExternalDocumentRef: " + "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " + "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " + "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"), ('\n'.join( ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', 'SPDXID: SPDXRef-DOCUMENT']), - ['Error while constructing CreationInfo: CreationInfo.__init__() missing 3 ' - "required positional arguments: 'document_namespace', 'creators', and " - "'created'"]), + r"__init__() missing 3 required positional arguments: " + r"'document_namespace', 'creators', and 'created'"), ('LicenseListVersion: 3.5\nLicenseListVersion: 3.7', - ["Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " - "found. Line: 2']"])])) + "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " + "found. Line: 2']")])) def test_parse_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: parser.parse(document_str) - assert err.value.get_messages() == expected_message + assert expected_message in err.value.get_messages()[0] diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index 0ecf9a5e5..2e83f75ad 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -25,7 +25,7 @@ def test_parse_package(): package_str = '\n'.join([ 'PackageName: Test', 'SPDXID: SPDXRef-Package', - 'PackageVersion: Version 0.9.2', + 'PackageVersion: 1:22.36.1-8+deb11u1', 'PackageDownloadLocation: http://example.com/test', 'FilesAnalyzed: True', 'PackageSummary: Test package', @@ -56,8 +56,10 @@ def test_parse_package(): package = document.packages[0] assert package.name == 'Test' assert package.spdx_id == 'SPDXRef-Package' - assert package.version == 'Version 0.9.2' + assert package.version == '1:22.36.1-8+deb11u1' assert len(package.license_info_from_files) == 2 + TestCase().assertCountEqual(package.license_info_from_files, [get_spdx_licensing().parse("Apache-1.0"), + get_spdx_licensing().parse("Apache-2.0")]) assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') assert package.files_analyzed is True assert package.comment == 'Comment on the package.' @@ -69,36 +71,35 @@ def test_parse_package(): ExternalPackageRef(ExternalPackageRefCategory.OTHER, "LocationRef-acmeforge", "acmecorp/acmenator/4.1.3-alpha")]) assert package.primary_package_purpose == PackagePurpose.OPERATING_SYSTEM - assert package.built_date == datetime(2020, 1, 1, 12, 0, 0) - assert package.release_date == datetime(2021, 1, 1, 12, 0, 0) - assert package.valid_until_date == datetime(2022, 1, 1, 12, 0, 0) + assert package.built_date == datetime(2020, 1, 1, 12) + assert package.release_date == datetime(2021, 1, 1, 12) + assert package.valid_until_date == datetime(2022, 1, 1, 12) @pytest.mark.parametrize("package_str, expected_message", [('PackageDownloadLocation: SPDXRef-Package', - ['Element Package is not the current element in scope, probably the expected ' - 'tag to start the element (PackageName) is missing. Line: 1']), + 'Element Package is not the current element in scope, probably the expected ' + 'tag to start the element (PackageName) is missing. Line: 1'), ('PackageName: TestPackage', - ['Error while constructing Package: Package.__init__() missing 2 required ' - "positional arguments: 'spdx_id' and 'download_location'"]), + r"__init__() missing 2 required positional arguments: 'spdx_id' and 'download_location'"), ('PackageName: TestPackage\nPackageCopyrightText:This is a copyright\n' 'PackageCopyrightText:MultipleCopyright', - ["Error while parsing Package: ['Multiple values for PackageCopyrightText " - "found. Line: 3']"]), + "Error while parsing Package: ['Multiple values for PackageCopyrightText " + "found. Line: 3']"), ('PackageName: TestPackage\nExternalRef: reference locator', - ['Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' - 'category, reference_type and locator. Line: 2"]']), + 'Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' + 'category, reference_type and locator. Line: 2"]'), ('PackageName: TestPackage\nExternalRef: category reference locator', - ["Error while parsing Package: ['Invalid ExternalPackageRefCategory: " - "category']"]), + "Error while parsing Package: ['Invalid ExternalPackageRefCategory: " + "category']"), ('SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n' 'PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator', - ["Error while parsing Package: ['Error while parsing PackageVerificationCode: " - "Value did not match expected format. Line: 5']"]), + "Error while parsing Package: ['Error while parsing PackageVerificationCode: " + "Value did not match expected format. Line: 5']"), ('PackageName: TestPackage\nBuiltDate: 2012\nValidUntilDate:202-11-02T00:00', - ["Error while parsing Package: ['Error while parsing BuiltDate: Token did not " - "match specified grammar rule. Line: 2', 'Error while parsing " - "ValidUntilDate: Token did not match specified grammar rule. Line: 3']"]) + "Error while parsing Package: ['Error while parsing BuiltDate: Token did not " + "match specified grammar rule. Line: 2', 'Error while parsing " + "ValidUntilDate: Token did not match specified grammar rule. Line: 3']") ]) def test_parse_invalid_package(package_str, expected_message): parser = Parser() @@ -106,4 +107,4 @@ def test_parse_invalid_package(package_str, expected_message): with pytest.raises(SPDXParsingError) as err: parser.parse(package_str) - assert err.value.get_messages() == expected_message + assert expected_message in err.value.get_messages()[0] diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index dc93c1dc4..bf8b821fa 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -36,6 +36,7 @@ def test_parse_relationship(relationship_str, expected_relationship): parser = Parser() document = parser.parse("\n".join([DOCUMENT_STR, relationship_str])) assert document is not None + assert len(document.relationships) == 1 relationship = document.relationships[0] assert relationship == expected_relationship diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index 0dd981fe3..7e76bd815 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -57,16 +57,16 @@ def test_parse_snippet(): @pytest.mark.parametrize("snippet_str, expected_message", [ - ('SnippetName: TestSnippet', ['Element Snippet is not the current element in scope, probably the expected ' - 'tag to start the element (SnippetSPDXID) is missing. Line: 1']), + ('SnippetName: TestSnippet', 'Element Snippet is not the current element in scope, probably the expected ' + 'tag to start the element (SnippetSPDXID) is missing. Line: 1'), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4', - ['Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' - 'valid range pattern. Line: 2"]']), + 'Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' + 'valid range pattern. Line: 2"]'), ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23', - ["Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " - "Line: 3']"]), - ('SnippetSPDXID: SPDXRef-Snippet', ['Error while constructing Snippet: Snippet.__init__() missing 2 required ' - "positional arguments: 'file_spdx_id' and 'byte_range'"]) + "Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " + "Line: 3']"), + ('SnippetSPDXID: SPDXRef-Snippet', r"__init__() missing 2 required " + r"positional arguments: 'file_spdx_id' and 'byte_range'") ]) def test_parse_invalid_snippet(snippet_str, expected_message): parser = Parser() @@ -74,4 +74,4 @@ def test_parse_invalid_snippet(snippet_str, expected_message): with pytest.raises(SPDXParsingError) as err: parser.parse(snippet_str) - assert err.value.get_messages() == expected_message + assert expected_message in err.value.get_messages()[0] From 1a1ca113b3b5ad8255f3f2950c015f9ed058e53a Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 8 Mar 2023 08:55:07 +0100 Subject: [PATCH 38/43] [review] fix type hint, parse_checksum and add test for the latter Signed-off-by: Meret Behrens --- .../parser/tagvalue/parser/helper_methods.py | 27 +++---- src/spdx/parser/tagvalue/parser/tagvalue.py | 6 +- .../parser/tagvalue/test_helper_methods.py | 73 +++++++++++++++++++ 3 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 tests/spdx/parser/tagvalue/test_helper_methods.py diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py index 869aa21e5..857667ef7 100644 --- a/src/spdx/parser/tagvalue/parser/helper_methods.py +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -11,10 +11,10 @@ import re from typing import Optional, Callable, Any, Dict +from ply.yacc import YaccProduction + from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.parser.error import SPDXParsingError -from spdx.parser.logger import Logger -from spdx.parser.parsing_functions import construct_or_raise_parsing_error def grammar_rule(doc): @@ -37,25 +37,22 @@ def str_from_text(text: Optional[str]) -> Optional[str]: return None -def parse_checksum(logger: Logger, checksum_str: str, line_number: int) -> Optional[Checksum]: - try: - algorithm, value = checksum_str.split(":") - except ValueError: - logger.append( - f"Couldn't split value for checksum in algorithm and value. Line: {line_number}") - return None +def parse_checksum(checksum_str: str) -> Checksum: + # The lexer and the corresponding regex for the token CHECKSUM and EXT_DOC_REF_CHECKSUM ensure that the passed + # checksum_str is formatted in the way that the following lines of code can't cause an error. + algorithm, value = checksum_str.split(":") algorithm = ChecksumAlgorithm[algorithm.upper().replace("-", "_")] value = value.strip() - try: - checksum = construct_or_raise_parsing_error(Checksum, {"algorithm": algorithm, "value": value}) - except SPDXParsingError as err: - logger.append(err.get_messages()) - checksum = None + checksum = Checksum(algorithm, value) return checksum -def set_value(parsed_value: Any, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None, +def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None, method_to_apply: Callable = lambda x: x): + # Parsed_value.slice returns a List of the objects in the corresponding grammar_rule for the parsed value, + # e.g. for @grammar_rule("created : CREATED DATE") the return value is something like + # p.slice = ["created", LexToken(CREATED,..), LexToken(DATE,..)]. + # So the first value is the name of the grammar_rule that we have named according to the field in the data model. if not argument_name: argument_name = str(parsed_value.slice[0]) if argument_name in dict_to_fill: diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 1c9cb4512..2cd4c2d5f 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -164,7 +164,7 @@ def p_doc_name(self, p): def p_external_document_ref(self, p): document_ref_id = p[2] document_uri = p[3] - checksum = parse_checksum(self.creation_info["logger"], p[4], p.lineno(1)) + checksum = parse_checksum(p[4]) external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) @@ -278,7 +278,7 @@ def p_file_type_value(self, p): @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum(self, p): self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - checksum = parse_checksum(self.current_element["logger"], p[2], p.lineno(1)) + checksum = parse_checksum(p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") @@ -391,7 +391,7 @@ def p_pkg_license_concluded(self, p): @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum(self, p): self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - checksum = parse_checksum(self.current_element["logger"], p[2], p.lineno(1)) + checksum = parse_checksum(p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("verification_code : PKG_VERF_CODE LINE") diff --git a/tests/spdx/parser/tagvalue/test_helper_methods.py b/tests/spdx/parser/tagvalue/test_helper_methods.py new file mode 100644 index 000000000..d38952502 --- /dev/null +++ b/tests/spdx/parser/tagvalue/test_helper_methods.py @@ -0,0 +1,73 @@ +# Copyright (c) 2023 spdx contributors +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest + +from spdx.model.checksum import ChecksumAlgorithm +from spdx.parser.tagvalue.parser.helper_methods import parse_checksum + + +@pytest.mark.parametrize("checksum_str, algorithm, value", + [("SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + ChecksumAlgorithm.SHA1, "d6a770ba38583ed4bb4525bd96e50461655d2759"), + ("SHA224: 9c9f4e27d957a123cc32d86afe33ae53b1184192cccb23b0f257f588", + ChecksumAlgorithm.SHA224, + "9c9f4e27d957a123cc32d86afe33ae53b1184192cccb23b0f257f588"), + ("SHA256: fbea580d286bbbbb41314430d58ba887716a74d7134119c5307cdc9f0c7a4299", + ChecksumAlgorithm.SHA256, + "fbea580d286bbbbb41314430d58ba887716a74d7134119c5307cdc9f0c7a4299"), + ( + "SHA384: 73b4ad9a34e5f76cb2525ea6bb8b1dcf9ba79426b3295bd18bc6d148cba4fcc2ca3cf2630fd481b47caaac9127103933", + ChecksumAlgorithm.SHA384, + "73b4ad9a34e5f76cb2525ea6bb8b1dcf9ba79426b3295bd18bc6d148cba4fcc2ca3cf2630fd481b47caaac9127103933"), + ( + "SHA512: c2aa8a5d297f5e888ce9a30d3745ccc5a628533449a9f98524de3d23695a268f394a67faf8ef370727c2946f1dbbec34aeb7ac10f15af43e7cb5547f1a464053", + ChecksumAlgorithm.SHA512, + "c2aa8a5d297f5e888ce9a30d3745ccc5a628533449a9f98524de3d23695a268f394a67faf8ef370727c2946f1dbbec34aeb7ac10f15af43e7cb5547f1a464053"), + ("SHA3-256: 1e772489c042f49aeaae32b00fc5ef170a25afa741cffaafadde597d4d1727ce", + ChecksumAlgorithm.SHA3_256, + "1e772489c042f49aeaae32b00fc5ef170a25afa741cffaafadde597d4d1727ce"), ( + "SHA3-384: dd9e30747551865b483bd76bd967384dce0e5670d1b1c3f701cffac7f49b1c46791253493835136b3aa5f679e364c166", + ChecksumAlgorithm.SHA3_384, + "dd9e30747551865b483bd76bd967384dce0e5670d1b1c3f701cffac7f49b1c46791253493835136b3aa5f679e364c166"), + ( + "SHA3-512: 906bca5580be8c95ae44f775363fb69968ad568898dfb03e0ff96cd9445a0b75f817b68e5c1e80ad624031f851cfddd3a101e1d111310266a5d46e2bc1ffbb36", + ChecksumAlgorithm.SHA3_512, + "906bca5580be8c95ae44f775363fb69968ad568898dfb03e0ff96cd9445a0b75f817b68e5c1e80ad624031f851cfddd3a101e1d111310266a5d46e2bc1ffbb36"), + ("BLAKE2b-256: a0eb3ddfa5807780a562b9c313b2537f1e8dc621e9a524f8c1ffcf07a79e35c7", + ChecksumAlgorithm.BLAKE2B_256, + "a0eb3ddfa5807780a562b9c313b2537f1e8dc621e9a524f8c1ffcf07a79e35c7"), ( + "BLAKE2B-384: 902511afc8939c0193d87857f45a19eddfd7e0413b0f8701a3baaf1b025f882b45a8fbf623fa0ad79b64850ac7a4d0b2", + ChecksumAlgorithm.BLAKE2B_384, + "902511afc8939c0193d87857f45a19eddfd7e0413b0f8701a3baaf1b025f882b45a8fbf623fa0ad79b64850ac7a4d0b2"), + ( + "BLAKE2B-512: 72c23b0160e1af3cb159f0cc96210c5e9aecc5a65d4618566776fa6117bf84929dcef56c7f8b087691c23000c945470842d90b5e8c4af74dce531ca8ebd8824c", + ChecksumAlgorithm.BLAKE2B_512, + "72c23b0160e1af3cb159f0cc96210c5e9aecc5a65d4618566776fa6117bf84929dcef56c7f8b087691c23000c945470842d90b5e8c4af74dce531ca8ebd8824c"), + ( + "BLAKE3: a872cac2efd29ed2ad8b5faa79b63f983341bea41183582b8863d952f6ac3e1cdfe0189967a13006857d3b9985174bf67239874dcec4cbbc9839496179feafeda872cac2efd29ed2ad8b5faa79b63f983341bea41183582b8863d952f6ac3e1cdfe0189967a13006857d3b9985174bf67239874dcec4cbbc9839496179feafed", + ChecksumAlgorithm.BLAKE3, + "a872cac2efd29ed2ad8b5faa79b63f983341bea41183582b8863d952f6ac3e1cdfe0189967a13006857d3b9985174bf67239874dcec4cbbc9839496179feafeda872cac2efd29ed2ad8b5faa79b63f983341bea41183582b8863d952f6ac3e1cdfe0189967a13006857d3b9985174bf67239874dcec4cbbc9839496179feafed"), + ("MD2: af1eec2a1b18886c3f3cc244349d91d8", ChecksumAlgorithm.MD2, + "af1eec2a1b18886c3f3cc244349d91d8"), + ("MD4: d4c41ce30a517d6ce9d79c8c17bb4b66", ChecksumAlgorithm.MD4, + "d4c41ce30a517d6ce9d79c8c17bb4b66"), + ("MD5: 0d7f61beb7018b3924c6b8f96549fa39", ChecksumAlgorithm.MD5, + "0d7f61beb7018b3924c6b8f96549fa39"), + ( + "MD6: af1eec2a1b18886c3f3cc244349d91d8d4c41ce30a517d6ce9d79c8c17bb4b660d7f61beb7018b3924c6b8f96549fa39", + ChecksumAlgorithm.MD6, + "af1eec2a1b18886c3f3cc244349d91d8d4c41ce30a517d6ce9d79c8c17bb4b660d7f61beb7018b3924c6b8f96549fa39"), + ("ADLER32: 02ec0130", ChecksumAlgorithm.ADLER32, "02ec0130")]) +def test_parse_checksum(checksum_str, algorithm, value): + checksum = parse_checksum(checksum_str) + + assert checksum.algorithm == algorithm + assert checksum.value == value From 2c7771665aff1e14b32a4bd1801a67442394c5ea Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 8 Mar 2023 08:59:25 +0100 Subject: [PATCH 39/43] squashed review commits concerning the tag value parser [review] add comments to parser to improve code readability [review] merge parsing methods for byte_range and line_range [review] delete superfluous except block [review] delete superfluous call to setdefault [review] delete superfluous case distinction [review] rename parameter [review] get rid of docstrings [review] rename Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/parser/tagvalue.py | 99 ++++++++++----------- 1 file changed, 45 insertions(+), 54 deletions(-) diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 2cd4c2d5f..9376d76cc 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -48,7 +48,7 @@ class Parser(object): logger: Logger current_element: Dict[str, Any] creation_info: Dict[str, Any] - elements_build: Dict[str, Any] + elements_built: Dict[str, Any] lex: SPDXLexer yacc: LRParser @@ -57,7 +57,7 @@ def __init__(self, **kwargs): self.logger = Logger() self.current_element = {"logger": Logger()} self.creation_info = {"logger": Logger()} - self.elements_build = dict() + self.elements_built = dict() self.lex = SPDXLexer() self.lex.build(reflags=re.UNICODE) self.yacc = yacc.yacc(module=self, **kwargs) @@ -168,8 +168,8 @@ def p_external_document_ref(self, p): external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) + @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE") def p_creator(self, p): - """creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE""" self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) @grammar_rule("created : CREATED DATE") @@ -513,36 +513,26 @@ def p_snippet_license_info(self, p): else: self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) - @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE") - def p_snippet_byte_range(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - if "byte_range" in self.current_element: - self.current_element["logger"].append( - f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") - range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) - if not range_re.match(p[2].strip()): - self.current_element["logger"].append(f"Value for SnippetByteRange doesn't match valid range pattern. " - f"Line: {p.lineno(1)}") + @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE\n snip_line_range : SNIPPET_LINE_RANGE LINE") + def p_snippet_range(self, p): + if p[1] == "SnippetByteRange": + argument_name = "byte_range" + elif p[1] == "SnippetLineRange": + argument_name = "line_range" + else: return - startpoint = int(p[2].split(":")[0]) - endpoint = int(p[2].split(":")[-1]) - self.current_element["byte_range"] = startpoint, endpoint - - @grammar_rule("snip_line_range : SNIPPET_LINE_RANGE LINE") - def p_snippet_line_range(self, p): self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - if "line_range" in self.current_element: + if argument_name in self.current_element: self.current_element["logger"].append( f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") - return range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): - self.current_element["logger"].append(f"Value for SnippetLineRange doesn't match valid range pattern. " + self.current_element["logger"].append(f"Value for {p[1]} doesn't match valid range pattern. " f"Line: {p.lineno(1)}") return startpoint = int(p[2].split(":")[0]) - endpoint = int(p[2].split(":")[1]) - self.current_element["line_range"] = startpoint, endpoint + endpoint = int(p[2].split(":")[-1]) + self.current_element[argument_name] = startpoint, endpoint # parsing methods for annotation @@ -552,8 +542,8 @@ def p_annotation_value_error(self, p): self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") + @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE") def p_annotator(self, p): - """annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE""" self.initialize_new_current_element(Annotation) set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) @@ -632,29 +622,32 @@ def p_error(self, p): pass def parse(self, text): + # entry point for the tag-value parser self.yacc.parse(text, lexer=self.lex) + # this constructs the last remaining element; all other elements are constructed at the start of + # their subsequent element self.construct_current_element() - try: - raise_parsing_error_if_logger_has_messages(self.creation_info.pop("logger"), "CreationInfo") - except SPDXParsingError as err: - self.logger.extend(err.get_messages()) + + # To be able to parse creation info values if they appear in between other elements, e.g. packages, we use + # two different dictionaries to collect the creation info and all other elements. Therefore, we have a separate + # logger for the creation info whose messages we need to add to the main logger to than raise all collected + # messages at once. + creation_info_logger = self.creation_info.pop("logger") + if creation_info_logger.has_messages(): + self.logger.extend([f"Error while parsing CreationInfo: {creation_info_logger.get_messages()}"]) + raise_parsing_error_if_logger_has_messages(self.logger) creation_info = construct_or_raise_parsing_error(CreationInfo, self.creation_info) - self.elements_build["creation_info"] = creation_info - document = construct_or_raise_parsing_error(Document, self.elements_build) + self.elements_built["creation_info"] = creation_info + document = construct_or_raise_parsing_error(Document, self.elements_built) return document - def initialize_new_current_element(self, class_name: Any): + def initialize_new_current_element(self, clazz: Any): self.construct_current_element() - self.current_element["class"] = class_name + self.current_element["class"] = clazz def check_that_current_element_matches_class_for_value(self, expected_class, line_number): - if "class" not in self.current_element: - self.logger.append( - f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " - f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " - f"Line: {line_number}") - elif expected_class != self.current_element["class"]: + if "class" not in self.current_element or expected_class != self.current_element["class"]: self.logger.append( f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " @@ -662,19 +655,17 @@ def check_that_current_element_matches_class_for_value(self, expected_class, lin def construct_current_element(self): if "class" not in self.current_element: - self.current_element = {"logger": Logger()} - return - class_name = self.current_element.pop("class") - try: - raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), class_name.__name__) - except SPDXParsingError as err: - self.logger.extend(err.get_messages()) - self.current_element = {"logger": Logger()} + # When the first element of the document is instantiated we don't have a current element in scope + # and the key "class" doesn't exist. Additionally, if the first element doesn't have the expected start + # value the key "class" wouldn't exist. To prevent a KeyError we use early return. return + + clazz = self.current_element.pop("class") try: - self.elements_build.setdefault(CLASS_MAPPING[class_name.__name__], []).append( - construct_or_raise_parsing_error(class_name, self.current_element)) - if class_name == File: + raise_parsing_error_if_logger_has_messages(self.current_element.pop("logger"), clazz.__name__) + self.elements_built.setdefault(CLASS_MAPPING[clazz.__name__], []).append( + construct_or_raise_parsing_error(clazz, self.current_element)) + if clazz == File: self.check_for_preceding_package_and_build_contains_relationship() except SPDXParsingError as err: self.logger.extend(err.get_messages()) @@ -682,13 +673,13 @@ def construct_current_element(self): def check_for_preceding_package_and_build_contains_relationship(self): file_spdx_id = self.current_element["spdx_id"] - if "packages" not in self.elements_build: + if "packages" not in self.elements_built: return # We assume that all files that are not contained in a package precede any package information. Any file # information that follows any package information is assigned to the last parsed package by creating a # corresponding contains relationship. # (see https://spdx.github.io/spdx-spec/v2.3/composition-of-an-SPDX-document/#5.2.2) - package_spdx_id = self.elements_build["packages"][-1].spdx_id + package_spdx_id = self.elements_built["packages"][-1].spdx_id relationship = Relationship(package_spdx_id, RelationshipType.CONTAINS, file_spdx_id) - if relationship not in self.elements_build.setdefault("relationships", []): - self.elements_build.setdefault("relationships", []).append(relationship) + if relationship not in self.elements_built.setdefault("relationships", []): + self.elements_built["relationships"].append(relationship) From 451200d1ff873dcfb6d6ade8e98f506aed000180 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 8 Mar 2023 12:11:22 +0100 Subject: [PATCH 40/43] squashed review commits concerning structure of the tag value parser [review] use strings instead of p.slice [review] merge generic parsing methods [review] parse value only if the current_element matches [review] merge parsing methods [review] merge error methods for current elements [review] delete tokens for enum values and let the parser take care of correct values Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/lexer/tagvalue.py | 22 +- .../parser/tagvalue/parser/helper_methods.py | 54 ++- src/spdx/parser/tagvalue/parser/tagvalue.py | 398 ++++++------------ .../parser/tagvalue/test_annotation_parser.py | 6 +- .../spdx/parser/tagvalue/test_file_parser.py | 5 +- .../parser/tagvalue/test_package_parser.py | 2 +- .../tagvalue/test_relationship_parser.py | 6 +- .../parser/tagvalue/test_tag_value_lexer.py | 8 +- .../parser/tagvalue/test_tag_value_parser.py | 16 +- 9 files changed, 200 insertions(+), 317 deletions(-) diff --git a/src/spdx/parser/tagvalue/lexer/tagvalue.py b/src/spdx/parser/tagvalue/lexer/tagvalue.py index e6737db47..3e4bb3569 100644 --- a/src/spdx/parser/tagvalue/lexer/tagvalue.py +++ b/src/spdx/parser/tagvalue/lexer/tagvalue.py @@ -97,27 +97,7 @@ class SPDXLexer(object): "SnippetLineRange": "SNIPPET_LINE_RANGE", # Common fields "NOASSERTION": "NO_ASSERTION", - "NONE": "NONE", - "SOURCE": "SOURCE", - "BINARY": "BINARY", - "ARCHIVE": "ARCHIVE", - "APPLICATION": "APPLICATION", - "AUDIO": "AUDIO", - "IMAGE": "IMAGE", - "TEXT": "FILETYPE_TEXT", - "VIDEO": "VIDEO", - "DOCUMENTATION": "DOCUMENTATION", - "SPDX": "SPDX", - "OTHER": "OTHER", - "REVIEW": "REVIEW", - "FRAMEWORK": "FRAMEWORK", - "LIBRARY": "LIBRARY", - "CONTAINER": "CONTAINER", - "OPERATING-SYSTEM": "OPERATING_SYSTEM", - "DEVICE": "DEVICE", - "FIRMWARE": "FIRMWARE", - "FILE": "FILE", - "INSTALL": "INSTALL" + "NONE": "NONE" } states = (("text", "exclusive"),) diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/parser/helper_methods.py index 857667ef7..f13204b78 100644 --- a/src/spdx/parser/tagvalue/parser/helper_methods.py +++ b/src/spdx/parser/tagvalue/parser/helper_methods.py @@ -13,7 +13,14 @@ from ply.yacc import YaccProduction +from spdx.casing_tools import camel_case_to_snake_case +from spdx.model.annotation import Annotation from spdx.model.checksum import Checksum, ChecksumAlgorithm +from spdx.model.document import CreationInfo +from spdx.model.extracted_licensing_info import ExtractedLicensingInfo +from spdx.model.file import File +from spdx.model.package import Package +from spdx.model.snippet import Snippet from spdx.parser.error import SPDXParsingError @@ -49,12 +56,8 @@ def parse_checksum(checksum_str: str) -> Checksum: def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None, method_to_apply: Callable = lambda x: x): - # Parsed_value.slice returns a List of the objects in the corresponding grammar_rule for the parsed value, - # e.g. for @grammar_rule("created : CREATED DATE") the return value is something like - # p.slice = ["created", LexToken(CREATED,..), LexToken(DATE,..)]. - # So the first value is the name of the grammar_rule that we have named according to the field in the data model. if not argument_name: - argument_name = str(parsed_value.slice[0]) + argument_name = get_property(parsed_value[1]) if argument_name in dict_to_fill: dict_to_fill["logger"].append( f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}") @@ -65,3 +68,44 @@ def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argume dict_to_fill["logger"].append(err.get_messages()) except ValueError as err: dict_to_fill["logger"].append(err.args[0]) + except KeyError: + dict_to_fill["logger"].append(f"Invalid {parsed_value[1]}: {parsed_value[2]}. Line: {parsed_value.lineno(1)}") + + +def get_property(tag: str): + if tag not in TAG_DATA_MODEL_FIELD.keys(): + return camel_case_to_snake_case(tag) + return TAG_DATA_MODEL_FIELD[tag][1] + + +# This dictionary serves as a mapping from a tag to the corresponding class and field in the internal data model. +# This mapping is not complete as we only list the values which can be parsed by a generic method and don't need any +# individual logic. +TAG_DATA_MODEL_FIELD = { + "SPDXVersion": (CreationInfo, "spdx_version"), "DataLicense": (CreationInfo, "data_license"), + "DocumentName": (CreationInfo, "name"), "DocumentComment": (CreationInfo, "document_comment"), + "DocumentNamespace": (CreationInfo, "document_namespace"), "Creator": (CreationInfo, "creator"), + "Created": (CreationInfo, "created"), "CreatorComment": (CreationInfo, "creator_comment"), + "LicenseListVersion": (CreationInfo, "license_list_version"), + "ExternalDocumentRef": (CreationInfo, "external_document_refs"), + "FileName": (File, "name"), "FileType": (File, "file_type"), "FileChecksum": (File, "checksums"), + "FileNotice": (File, "notice"), "FileCopyrightText": (File, "copyright_text"), + "LicenseComments": (File, "license_comment"), "FileComment": (File, "comment"), + "LicenseConcluded": (File, "license_concluded"), "LicenseDeclared": (File, "license_declared"), + "PackageName": (Package, "name"), "PackageComment": (Package, "comment"), + "PackageCopyrightText": (Package, "copyright_text"), "PackageLicenseComments": (Package, "license_comment"), + "PackageLicenseDeclared": (Package, "license_declared"), "PackageLicenseConcluded": (Package, "license_concluded"), + "PackageFileName": (Package, "file_name"), "PackageVersion": (Package, "version"), + "PackageDownloadLocation": (Package, "download_location"), "PackageSummary": (Package, "summary"), + "PackageSourceInfo": (Package, "source_info"), "PackageSupplier": (Package, "supplier"), + "PackageOriginator": (Package, "originator"), "PackageDescription": (Package, "description"), + "PackageHomePage": (Package, "homepage"), + "SnippetSPDXID": (Snippet, "spdx_id"), "SnippetFromFileSPDXID": (Snippet, "file_spdx_id"), + "SnippetName": (Snippet, "name"), + "SnippetComment": (Snippet, "comment"), "SnippetCopyrightText": (Snippet, "copyright_text"), + "SnippetLicenseComments": (Snippet, "license_comment"), "SnippetLicenseConcluded": (Snippet, "license_concluded"), + "SnippetByteRange": (Snippet, "byte_range"), "SnippetLineRange": (Snippet, "line_range"), + "SPDXREF": (Annotation, "spdx_id"), "AnnotationComment": (Annotation, "annotation_comment"), + "LicenseID": (ExtractedLicensingInfo, "license_id"), "ExtractedText": (ExtractedLicensingInfo, "extracted_text"), + "LicenseComment": (ExtractedLicensingInfo, "comment"), "LicenseName": (ExtractedLicensingInfo, "license_name") +} diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser/tagvalue.py index 9376d76cc..9ae442f3e 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser/tagvalue.py @@ -35,12 +35,16 @@ from spdx.parser.logger import Logger from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer -from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum, set_value +from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum, set_value, \ + TAG_DATA_MODEL_FIELD CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", Package="packages", ExtractedLicensingInfo="extracted_licensing_info") ELEMENT_EXPECTED_START_TAG = dict(File="FileName", Annotation="Annotator", Relationship="Relationship", Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID") +EXPECTED_START_TAG_ELEMENT = {"FileName": File, "PackageName": Package, "Annotator": Annotation, + "Relationship": Relationship, "SnippetSPDXID": Snippet, + "LicenseID": ExtractedLicensingInfo} class Parser(object): @@ -97,6 +101,68 @@ def p_attrib(self, p): pass # general parsing methods + @grammar_rule("license_id : LICS_ID error\n lic_xref : LICS_CRS_REF error\n lic_comment : LICS_COMMENT error\n " + "license_name : LICS_NAME error\n extracted_text : LICS_TEXT error\n " + "file_name : FILE_NAME error\n file_contrib : FILE_CONTRIB error\n file_notice : FILE_NOTICE error\n " + "file_cr_text : FILE_CR_TEXT error\n file_lics_comment : FILE_LICS_COMMENT error\n " + "file_attribution_text : FILE_ATTRIBUTION_TEXT error\n file_lics_info : FILE_LICS_INFO error\n " + "file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n " + "file_conc : FILE_LICS_CONC error\n file_type : FILE_TYPE error\n " + "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " + "description : PKG_DESC error\n pkg_comment : PKG_COMMENT error\n summary : PKG_SUM error\n " + "pkg_cr_text : PKG_CPY_TEXT error\n pkg_ext_ref : PKG_EXT_REF error\n " + "pkg_lic_comment : PKG_LICS_COMMENT error\n pkg_lic_decl : PKG_LICS_DECL error\n " + "pkg_lic_ff : PKG_LICS_FFILE error \n pkg_lic_conc : PKG_LICS_CONC error\n " + "source_info : PKG_SRC_INFO error\n homepage : PKG_HOME error\n pkg_checksum : PKG_CHECKSUM error\n " + "verification_code : PKG_VERF_CODE error\n download_location : PKG_DOWN error\n " + "files_analyzed : PKG_FILES_ANALYZED error\n originator : PKG_ORIG error\n " + "supplier : PKG_SUPPL error\n pkg_file_name : PKG_FILE_NAME error\n " + "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " + "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " + "valid_until_date : VALID_UNTIL_DATE error\n snip_spdx_id : SNIPPET_SPDX_ID error\n " + "snip_name : SNIPPET_NAME error\n snip_comment : SNIPPET_COMMENT error\n " + "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snip_cr_text : SNIPPET_CR_TEXT error\n " + "snip_lic_comment : SNIPPET_LICS_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " + "snip_lics_conc : SNIPPET_LICS_CONC error\n snip_lics_info : SNIPPET_LICS_INFO error\n " + "snip_byte_range : SNIPPET_BYTE_RANGE error\n snip_line_range : SNIPPET_LINE_RANGE error\n " + "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " + "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " + "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error") + def p_current_element_error(self, p): + if p[1] in EXPECTED_START_TAG_ELEMENT.keys(): + self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]]) + self.current_element["logger"].append( + f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") + + @grammar_rule("license_name : LICS_NAME line_or_no_assertion\n extracted_text : LICS_TEXT text_or_line\n " + "lic_comment : LICS_COMMENT text_or_line\n license_id : LICS_ID LINE\n " + "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " + "file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none\n " + "file_lics_comment : FILE_LICS_COMMENT text_or_line\n file_comment : FILE_COMMENT text_or_line\n " + "file_conc : FILE_LICS_CONC license_or_no_assertion_or_none\n " + "package_name : PKG_NAME LINE\n description : PKG_DESC text_or_line\n summary : PKG_SUM text_or_line\n " + "source_info : PKG_SRC_INFO text_or_line\n homepage : PKG_HOME line_or_no_assertion_or_none\n " + "download_location : PKG_DOWN line_or_no_assertion_or_none\n originator : PKG_ORIG actor_or_no_assertion\n " + "supplier : PKG_SUPPL actor_or_no_assertion\n pkg_comment : PKG_COMMENT text_or_line\n " + "pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none\n " + "pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none\n pkg_file_name : PKG_FILE_NAME LINE\n " + "pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none\n package_version : PKG_VERSION LINE\n " + "pkg_lic_comment : PKG_LICS_COMMENT text_or_line\n " + "snip_spdx_id : SNIPPET_SPDX_ID LINE\n snip_name : SNIPPET_NAME LINE\n " + "snip_comment : SNIPPET_COMMENT text_or_line\n " + "snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none\n " + "snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line\n file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " + "snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none\n " + "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " + "annotation_comment : ANNOTATION_COMMENT text_or_line\n " + + ) + def p_generic_value(self, p): + if p[1] in EXPECTED_START_TAG_ELEMENT.keys(): + self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]]) + if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): + set_value(p, self.current_element) + @grammar_rule("unknown_tag : UNKNOWN_TAG text_or_line\n | UNKNOWN_TAG DATE\n | UNKNOWN_TAG PERSON_VALUE \n" "| UNKNOWN_TAG") def p_unknown_tag(self, p): @@ -148,7 +214,7 @@ def p_creation_info_value_error(self, p): @grammar_rule("document_comment : DOC_COMMENT text_or_line\n document_namespace : DOC_NAMESPACE LINE\n " "data_license : DOC_LICENSE LINE\n spdx_version : DOC_VERSION LINE\n " - "creator_comment : CREATOR_COMMENT text_or_line") + "creator_comment : CREATOR_COMMENT text_or_line\n doc_name : DOC_NAME LINE") def p_generic_value_creation_info(self, p): set_value(p, self.creation_info) @@ -156,10 +222,6 @@ def p_generic_value_creation_info(self, p): def p_license_list_version(self, p): set_value(p, self.creation_info, method_to_apply=Version.from_string) - @grammar_rule("doc_name : DOC_NAME LINE") - def p_doc_name(self, p): - set_value(p, self.creation_info, argument_name="name") - @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") def p_external_document_ref(self, p): document_ref_id = p[2] @@ -178,168 +240,61 @@ def p_created(self, p): # parsing methods for extracted licensing info - @grammar_rule("license_id : LICS_ID error\n lic_xref : LICS_CRS_REF error\n lic_comment : LICS_COMMENT error\n " - "license_name : LICS_NAME error\n extracted_text : LICS_TEXT error") - def p_extracted_licensing_info_value_error(self, p): - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("license_name : LICS_NAME line_or_no_assertion\n extracted_text : LICS_TEXT text_or_line") - def p_generic_value_extracted_licensing_info(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) - set_value(p, self.current_element) - - @grammar_rule("license_id : LICS_ID LINE") - def p_extracted_license_id(self, p): - self.initialize_new_current_element(ExtractedLicensingInfo) - set_value(p, self.current_element) - @grammar_rule("lic_xref : LICS_CRS_REF LINE") def p_extracted_cross_reference(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) - self.current_element.setdefault("cross_references", []).append(p[2]) - - @grammar_rule("lic_comment : LICS_COMMENT text_or_line") - def p_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)) - set_value(p, self.current_element, argument_name="comment") + if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): + self.current_element.setdefault("cross_references", []).append(p[2]) # parsing methods for file - @grammar_rule("file_contrib : FILE_CONTRIB error\n file_notice : FILE_NOTICE error\n " - "file_cr_text : FILE_CR_TEXT error\n file_lics_comment : FILE_LICS_COMMENT error\n " - "file_attribution_text : FILE_ATTRIBUTION_TEXT error\n file_lics_info : FILE_LICS_INFO error\n " - "file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n " - "file_conc : FILE_LICS_CONC error\n file_type : FILE_TYPE error") - def p_file_value_error(self, p): - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("file_name : FILE_NAME LINE") - def p_file_name(self, p): - self.initialize_new_current_element(File) - set_value(p, self.current_element, argument_name="name") - - @grammar_rule("file_name : FILE_NAME error") - def p_file_name_error(self, p): - self.initialize_new_current_element(File) - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("file_contrib : FILE_CONTRIB LINE") def p_file_contributor(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - self.current_element.setdefault("contributors", []).append(p[2]) - - @grammar_rule("file_notice : FILE_NOTICE text_or_line") - def p_file_notice(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - set_value(p, self.current_element, argument_name="notice") - - @grammar_rule("file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none") - def p_file_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - set_value(p, self.current_element, argument_name="copyright_text") - - @grammar_rule("file_lics_comment : FILE_LICS_COMMENT text_or_line") - def p_file_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_comment") + if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): + self.current_element.setdefault("contributors", []).append(p[2]) @grammar_rule("file_attribution_text : FILE_ATTRIBUTION_TEXT text_or_line") def p_file_attribution_text(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - self.current_element.setdefault("attribution_texts", []).append(p[2]) + if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): + self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") def p_file_license_info(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): + return if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_file"] = p[2] return self.current_element.setdefault("license_info_in_file", []).append(p[2]) - @grammar_rule("file_comment : FILE_COMMENT text_or_line") - def p_file_comment(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - set_value(p, self.current_element, argument_name="comment") - - @grammar_rule("file_type : FILE_TYPE file_type_value") + @grammar_rule("file_type : FILE_TYPE LINE") def p_file_type(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - self.current_element.setdefault("file_type", []).append(FileType[p[2]]) - - @grammar_rule( - "file_type_value : SOURCE\n| BINARY\n| ARCHIVE\n | APPLICATION\n | AUDIO\n | IMAGE\n | FILETYPE_TEXT\n| VIDEO\n" - " | DOCUMENTATION\n| SPDX \n| OTHER ") - def p_file_type_value(self, p): - p[0] = p[1] + if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): + return + try: + file_type = FileType[p[2].strip()] + except KeyError: + self.current_element["logger"].append(f"Invalid FileType: {p[2]}. Line {p.lineno(1)}") + return + self.current_element.setdefault("file_type", []).append(file_type) @grammar_rule("file_checksum : FILE_CHECKSUM CHECKSUM") def p_file_checksum(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): + return checksum = parse_checksum(p[2]) self.current_element.setdefault("checksums", []).append(checksum) - @grammar_rule("file_conc : FILE_LICS_CONC license_or_no_assertion_or_none") - def p_file_license_concluded(self, p): - self.check_that_current_element_matches_class_for_value(File, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_concluded") - # parsing methods for package - @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n description : PKG_DESC error\n " - "pkg_comment : PKG_COMMENT error\n summary : PKG_SUM error\n pkg_cr_text : PKG_CPY_TEXT error\n " - "pkg_ext_ref : PKG_EXT_REF error\n pkg_lic_comment : PKG_LICS_COMMENT error\n " - "pkg_lic_decl : PKG_LICS_DECL error\n pkg_lic_ff : PKG_LICS_FFILE error \n " - "pkg_lic_conc : PKG_LICS_CONC error\n source_info : PKG_SRC_INFO error\n homepage : PKG_HOME error\n " - "pkg_checksum : PKG_CHECKSUM error\n verification_code : PKG_VERF_CODE error\n " - "download_location : PKG_DOWN error\n files_analyzed : PKG_FILES_ANALYZED error\n " - "originator : PKG_ORIG error\n supplier : PKG_SUPPL error\n pkg_file_name : PKG_FILE_NAME error\n " - "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " - "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " - "valid_until_date : VALID_UNTIL_DATE error") - def p_package_value_error(self, p): - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("description : PKG_DESC text_or_line\n summary : PKG_SUM text_or_line\n " - "source_info : PKG_SRC_INFO text_or_line\n homepage : PKG_HOME line_or_no_assertion_or_none\n " - "download_location : PKG_DOWN line_or_no_assertion_or_none\n " - "originator : PKG_ORIG actor_or_no_assertion\n supplier : PKG_SUPPL actor_or_no_assertion") - def p_generic_package_value(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element) - - @grammar_rule("package_name : PKG_NAME LINE") - def p_package_name(self, p): - self.initialize_new_current_element(Package) - set_value(p, self.current_element, argument_name="name") - - @grammar_rule("package_name : PKG_NAME error") - def p_package_name_error(self, p): - self.initialize_new_current_element(Package) - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("pkg_comment : PKG_COMMENT text_or_line") - def p_pkg_comment(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="comment") - @grammar_rule("pkg_attribution_text : PKG_ATTRIBUTION_TEXT text_or_line") def p_pkg_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none") - def p_pkg_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="copyright_text") - @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") def p_pkg_external_refs(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + return try: category, reference_type, locator = p[2].split(" ") except ValueError: @@ -352,7 +307,8 @@ def p_pkg_external_refs(self, p): try: category = ExternalPackageRefCategory[category.replace("-", "_")] except KeyError: - self.current_element["logger"].append(f"Invalid ExternalPackageRefCategory: {category}") + self.current_element["logger"].append( + f"Invalid ExternalPackageRefCategory: {category}. Line: {p.lineno(1)}") return try: external_package_ref = construct_or_raise_parsing_error(ExternalPackageRef, @@ -365,39 +321,28 @@ def p_pkg_external_refs(self, p): return self.current_element.setdefault("external_references", []).append(external_package_ref) - @grammar_rule("pkg_lic_comment : PKG_LICS_COMMENT text_or_line") - def p_pkg_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_comment") - - @grammar_rule("pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none") - def p_pkg_license_declared(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_declared") - @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") def p_pkg_license_info_from_file(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + return if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_from_files"] = p[2] else: self.current_element.setdefault("license_info_from_files", []).append(p[2]) - @grammar_rule("pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none") - def p_pkg_license_concluded(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_concluded") - @grammar_rule("pkg_checksum : PKG_CHECKSUM CHECKSUM") def p_pkg_checksum(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + return checksum = parse_checksum(p[2]) self.current_element.setdefault("checksums", []).append(checksum) @grammar_rule("verification_code : PKG_VERF_CODE LINE") def p_pkg_verification_code(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - if str(p.slice[0]) in self.current_element: + if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + return + + if "verification_code" in self.current_element: self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") return verif_code_regex = re.compile(r"([0-9a-f]{40})\s*(\(excludes:\s*(.+)\))?", re.UNICODE) @@ -412,102 +357,39 @@ def p_pkg_verification_code(self, p): excluded_files = None if match.group(verif_code_exc_files_grp): excluded_files = match.group(verif_code_exc_files_grp).split(",") - self.current_element[str(p.slice[0])] = PackageVerificationCode(value, excluded_files) + self.current_element["verification_code"] = PackageVerificationCode(value, excluded_files) @grammar_rule("files_analyzed : PKG_FILES_ANALYZED LINE") def p_pkg_files_analyzed(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - if str(p.slice[0]) in self.current_element: + if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + return + if "files_analyzed" in self.current_element: self.current_element["logger"].append(f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") return - self.current_element[str(p.slice[0])] = p[2] in ['true', 'True'] - - @grammar_rule("pkg_file_name : PKG_FILE_NAME LINE") - def p_pkg_file_name(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="file_name") - - @grammar_rule("package_version : PKG_VERSION LINE") - def p_package_version(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, argument_name="version") + self.current_element["files_analyzed"] = p[2] in ['true', 'True'] - @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE primary_package_purpose_value") + @grammar_rule("primary_package_purpose : PRIMARY_PACKAGE_PURPOSE LINE") def p_primary_package_purpose(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) - - @grammar_rule("primary_package_purpose_value : APPLICATION\n | FRAMEWORK\n | LIBRARY\n | CONTAINER\n " - "| OPERATING_SYSTEM \n | DEVICE \n| FIRMWARE\n | SOURCE\n | ARCHIVE\n | FILE\n | INSTALL\n | OTHER") - def p_primary_package_purpose_value(self, p): - p[0] = p[1] + if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + set_value(p, self.current_element, method_to_apply=lambda x: PackagePurpose[x.replace("-", "_")]) @grammar_rule("built_date : BUILT_DATE DATE\n release_date : RELEASE_DATE DATE\n " "valid_until_date : VALID_UNTIL_DATE DATE") def p_package_dates(self, p): - self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) - set_value(p, self.current_element, method_to_apply=datetime_from_str) + if self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): + set_value(p, self.current_element, method_to_apply=datetime_from_str) # parsing methods for snippet - @grammar_rule("snip_name : SNIPPET_NAME error\n snip_comment : SNIPPET_COMMENT error\n " - "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snip_cr_text : SNIPPET_CR_TEXT error\n " - "snip_lic_comment : SNIPPET_LICS_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " - "snip_lics_conc : SNIPPET_LICS_CONC error\n snip_lics_info : SNIPPET_LICS_INFO error\n " - "snip_byte_range : SNIPPET_BYTE_RANGE error\n snip_line_range : SNIPPET_LINE_RANGE error\n ") - def p_snippet_value_error(self, p): - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID LINE") - def p_snippet_spdx_id(self, p): - self.initialize_new_current_element(Snippet) - set_value(p, self.current_element, argument_name="spdx_id") - - @grammar_rule("snip_spdx_id : SNIPPET_SPDX_ID error") - def p_snippet_spdx_id_error(self, p): - self.initialize_new_current_element(Snippet) - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - - @grammar_rule("snip_name : SNIPPET_NAME LINE") - def p_snippet_name(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element, argument_name="name") - - @grammar_rule("snip_comment : SNIPPET_COMMENT text_or_line") - def p_snippet_comment(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element, argument_name="comment") - @grammar_rule("snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT text_or_line") def p_snippet_attribution_text(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - self.current_element.setdefault("attribution_texts", []).append(p[2]) - - @grammar_rule("snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none") - def p_snippet_copyright_text(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element, argument_name="copyright_text") - - @grammar_rule("snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line") - def p_snippet_license_comment(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_comment") - - @grammar_rule("file_spdx_id : SNIPPET_FILE_SPDXID LINE") - def p_snippet_from_file_spdxid(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element) - - @grammar_rule("snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none") - def p_snippet_concluded_license(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) - set_value(p, self.current_element, argument_name="license_concluded") + if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): + self.current_element.setdefault("attribution_texts", []).append(p[2]) @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") def p_snippet_license_info(self, p): - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) + if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): + return if p[2] == SpdxNone() or p[2] == SpdxNoAssertion(): self.current_element["license_info_in_snippet"] = p[2] else: @@ -515,13 +397,10 @@ def p_snippet_license_info(self, p): @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE\n snip_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_range(self, p): - if p[1] == "SnippetByteRange": - argument_name = "byte_range" - elif p[1] == "SnippetLineRange": - argument_name = "line_range" - else: + if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): return - self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)) + + argument_name = TAG_DATA_MODEL_FIELD[p[1]][1] if argument_name in self.current_element: self.current_element["logger"].append( f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") @@ -536,45 +415,20 @@ def p_snippet_range(self, p): # parsing methods for annotation - @grammar_rule("annotation_date : ANNOTATION_DATE error\n annotation_comment : ANNOTATION_COMMENT error\n " - "annotation_type : ANNOTATION_TYPE error\n annotation_spdx_id : ANNOTATION_SPDX_ID error") - def p_annotation_value_error(self, p): - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE") def p_annotator(self, p): self.initialize_new_current_element(Annotation) set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) - @grammar_rule("annotator : ANNOTATOR error") - def p_annotator_error(self, p): - self.initialize_new_current_element(Annotation) - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("annotation_date : ANNOTATION_DATE DATE") def p_annotation_date(self, p): - self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) - set_value(p, self.current_element, method_to_apply=datetime_from_str) - - @grammar_rule("annotation_comment : ANNOTATION_COMMENT text_or_line") - def p_annotation_comment(self, p): - self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) - set_value(p, self.current_element) + if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): + set_value(p, self.current_element, method_to_apply=datetime_from_str) - @grammar_rule("annotation_type : ANNOTATION_TYPE annotation_type_value") + @grammar_rule("annotation_type : ANNOTATION_TYPE LINE") def p_annotation_type(self, p): - self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)) - set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) - - @grammar_rule("annotation_type_value : OTHER\n| REVIEW") - def p_annotation_type_value(self, p): - p[0] = p[1] - - @grammar_rule("annotation_spdx_id : ANNOTATION_SPDX_ID LINE") - def p_annotation_spdx_id(self, p): - set_value(p, self.current_element, argument_name="spdx_id") + if self.check_that_current_element_matches_class_for_value(Annotation, p.lineno(1)): + set_value(p, self.current_element, method_to_apply=lambda x: AnnotationType[x]) # parsing methods for relationship @@ -602,12 +456,6 @@ def p_relationship(self, p): if len(p) == 5: self.current_element["comment"] = p[4] - @grammar_rule("relationship : RELATIONSHIP error") - def p_relationship_error(self, p): - self.initialize_new_current_element(Relationship) - self.current_element["logger"].append( - f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("relationship_value : DOC_REF_ID LINE") def p_relationship_value_with_doc_ref(self, p): @@ -646,12 +494,14 @@ def initialize_new_current_element(self, clazz: Any): self.construct_current_element() self.current_element["class"] = clazz - def check_that_current_element_matches_class_for_value(self, expected_class, line_number): + def check_that_current_element_matches_class_for_value(self, expected_class, line_number) -> bool: if "class" not in self.current_element or expected_class != self.current_element["class"]: self.logger.append( f"Element {expected_class.__name__} is not the current element in scope, probably the expected tag to " f"start the element ({ELEMENT_EXPECTED_START_TAG[expected_class.__name__]}) is missing. " f"Line: {line_number}") + return False + return True def construct_current_element(self): if "class" not in self.current_element: diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index 7df26534b..cf9ee7614 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -45,9 +45,9 @@ def test_parse_annotation(): "required positional arguments: 'spdx_id', 'annotation_type', " "'annotation_date', and 'annotation_comment'"), ('Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23', - "Error while parsing Annotation: ['Error while parsing AnnotationType: Token " - "did not match specified grammar rule. Line: 2', 'Error while parsing " - "AnnotationDate: Token did not match specified grammar rule. Line: 3']"), + "Error while parsing Annotation: ['Invalid AnnotationType: SOURCE. Line: 2', " + "'Error while parsing AnnotationDate: Token did not match specified grammar " + "rule. Line: 3']"), ('Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n' 'AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT', "Error while parsing Annotation: ['Error while parsing Annotator: Token did " diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index 7ca5c4118..c6190850a 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -63,6 +63,5 @@ def test_parse_invalid_file(): with pytest.raises(SPDXParsingError) as err: parser.parse(file_str) - assert err.value.get_messages() == ["Error while parsing File: ['Error while parsing FileType: Token did not " - "match specified grammar rule. Line: 3', 'Error while parsing FileChecksum: " - "Token did not match specified grammar rule. Line: 5']"] + assert err.value.get_messages() == ["Error while parsing File: ['Invalid FileType: SOUCE. Line 3', 'Error while " + "parsing FileChecksum: Token did not match specified grammar rule. Line: 5']"] diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index 2e83f75ad..02e9dea2a 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -91,7 +91,7 @@ def test_parse_package(): 'category, reference_type and locator. Line: 2"]'), ('PackageName: TestPackage\nExternalRef: category reference locator', "Error while parsing Package: ['Invalid ExternalPackageRefCategory: " - "category']"), + "category. Line: 2']"), ('SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n' 'PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator', "Error while parsing Package: ['Error while parsing PackageVerificationCode: " diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index bf8b821fa..adc9a2ecb 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -46,11 +46,7 @@ def test_parse_relationship(relationship_str, expected_relationship): ['Error while parsing Relationship: ["Relationship couldn\'t be split in spdx_element_id, ' 'relationship_type and related_spdx_element. Line: 1"]']), ("Relationship: spdx_id IS spdx_id", - ["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"]), - ("Relationship: spdx_id IS spdx_id\nRelationshipComment: SOURCE", - ["Error while parsing Relationship: ['Error while parsing Relationship: Token " - "did not match specified grammar rule. Line: 1']"]) - ]) + ["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"])]) def test_parse_invalid_relationship(relationship_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py index ce6b9a159..bd82fab3b 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py @@ -86,9 +86,9 @@ def test_tokenization_of_file(lexer): token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 2) token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-File', 2) token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 3) - token_assert_helper(lexer.token(), 'SOURCE', 'SOURCE', 3) + token_assert_helper(lexer.token(), 'LINE', 'SOURCE', 3) token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 4) - token_assert_helper(lexer.token(), 'FILETYPE_TEXT', 'TEXT', 4) + token_assert_helper(lexer.token(), 'LINE', 'TEXT', 4) token_assert_helper(lexer.token(), 'FILE_CHECKSUM', 'FileChecksum', 5) token_assert_helper(lexer.token(), 'CHECKSUM', 'SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 5) token_assert_helper(lexer.token(), 'FILE_LICS_CONC', 'LicenseConcluded', 6) @@ -202,7 +202,7 @@ def test_tokenization_of_package(lexer): token_assert_helper(lexer.token(), 'PKG_EXT_REF_COMMENT', 'ExternalRefComment', 22) token_assert_helper(lexer.token(), 'TEXT', 'Some comment about the package.', 22) token_assert_helper(lexer.token(), 'PRIMARY_PACKAGE_PURPOSE', 'PrimaryPackagePurpose', 23) - token_assert_helper(lexer.token(), 'OPERATING_SYSTEM', 'OPERATING-SYSTEM', 23) + token_assert_helper(lexer.token(), 'LINE', 'OPERATING-SYSTEM', 23) token_assert_helper(lexer.token(), 'BUILT_DATE', 'BuiltDate', 24) token_assert_helper(lexer.token(), 'DATE', '2020-01-01T12:00:00Z', 24) token_assert_helper(lexer.token(), 'RELEASE_DATE', 'ReleaseDate', 25) @@ -272,7 +272,7 @@ def test_tokenization_of_annotation(lexer): token_assert_helper(lexer.token(), 'ANNOTATION_COMMENT', 'AnnotationComment', 3) token_assert_helper(lexer.token(), 'TEXT', 'Document level annotation', 3) token_assert_helper(lexer.token(), 'ANNOTATION_TYPE', 'AnnotationType', 4) - token_assert_helper(lexer.token(), 'OTHER', 'OTHER', 4) + token_assert_helper(lexer.token(), 'LINE', 'OTHER', 4) token_assert_helper(lexer.token(), 'ANNOTATION_SPDX_ID', 'SPDXREF', 5) token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT', 5) diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index 9dde9dacf..38755a24a 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -47,7 +47,7 @@ def test_tag_value_parser(): def test_building_contains_relationship(): parser = Parser() document_str = "\n".join( - [DOCUMENT_STR, "SPDXID: SPDXRef-DOCUMENT", "FileName: File without package", "SPDXID: SPDXRef-File", + [DOCUMENT_STR, "FileName: File without package", "SPDXID: SPDXRef-File", "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", "PackageName: Package with two files", "SPDXID: SPDXRef-Package-with-two-files", "PackageDownloadLocation: https://download.com", @@ -66,3 +66,17 @@ def test_building_contains_relationship(): Relationship("SPDXRef-Package-with-two-files", RelationshipType.CONTAINS, "SPDXRef-File-in-Package"), Relationship("SPDXRef-Package-with-two-files", RelationshipType.CONTAINS, "SPDXRef-Second-File-in-Package"), Relationship("SPDXRef-Package-with-one-file", RelationshipType.CONTAINS, "SPDXRef-File-in-different-Package")] + + +def test_document_with_mixed_values(): + parser = Parser() + document_str = "\n".join( + ["SPDXID:SPDXRef-DOCUMENT", "FileName: File without package", "SPDXID: SPDXRef-File", + "PackageDownloadLocation: https://download.com", + "FileChecksum: SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759"]) + + with pytest.raises(SPDXParsingError) as err: + parser.parse(document_str) + + assert err.value.get_messages() == ["Element Package is not the current element in scope, probably the expected " + "tag to start the element (PackageName) is missing. Line: 4"] From c97217721f14c5899fd505b0b3d18209d7deebe7 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 8 Mar 2023 15:26:29 +0100 Subject: [PATCH 41/43] squashed review commits concerning structure, double quotes and naming [review] use double quotes [review] change file structure of tag value parser [review] rename Signed-off-by: Meret Behrens --- .gitignore | 2 +- src/spdx/parser/parse_anything.py | 2 +- .../parser/tagvalue/{lexer => }/__init__.py | 0 .../tagvalue/{parser => }/helper_methods.py | 0 .../tagvalue/{lexer/tagvalue.py => lexer.py} | 72 +-- .../{parser/tagvalue.py => parser.py} | 141 +++--- src/spdx/parser/tagvalue/parser/__init__.py | 0 .../tagvalue/{parser => }/tagvalue_parser.py | 2 +- .../parser/tagvalue/test_annotation_parser.py | 36 +- .../tagvalue/test_creation_info_parser.py | 84 ++-- .../test_extracted_licensing_info_parser.py | 54 +-- .../spdx/parser/tagvalue/test_file_parser.py | 54 +-- .../parser/tagvalue/test_helper_methods.py | 2 +- .../parser/tagvalue/test_package_parser.py | 94 ++-- .../tagvalue/test_relationship_parser.py | 16 +- .../parser/tagvalue/test_snippet_parser.py | 54 +-- .../parser/tagvalue/test_tag_value_lexer.py | 424 +++++++++--------- .../parser/tagvalue/test_tag_value_parser.py | 4 +- .../writer/tagvalue/test_tagvalue_writer.py | 2 +- 19 files changed, 526 insertions(+), 517 deletions(-) rename src/spdx/parser/tagvalue/{lexer => }/__init__.py (100%) rename src/spdx/parser/tagvalue/{parser => }/helper_methods.py (100%) rename src/spdx/parser/tagvalue/{lexer/tagvalue.py => lexer.py} (76%) rename src/spdx/parser/tagvalue/{parser/tagvalue.py => parser.py} (78%) delete mode 100644 src/spdx/parser/tagvalue/parser/__init__.py rename src/spdx/parser/tagvalue/{parser => }/tagvalue_parser.py (93%) diff --git a/.gitignore b/.gitignore index 5ef28e630..201c079bc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ /build/ /dist/ /tmp/ -src/spdx/parser/tagvalue/parser/parsetab.py +src/spdx/parser/tagvalue/parsetab.py /.cache/ .tox diff --git a/src/spdx/parser/parse_anything.py b/src/spdx/parser/parse_anything.py index b2d1dfd87..8b156cf34 100644 --- a/src/spdx/parser/parse_anything.py +++ b/src/spdx/parser/parse_anything.py @@ -11,7 +11,7 @@ from spdx.formats import file_name_to_format, FileFormat from spdx.parser.json import json_parser from spdx.parser.rdf import rdf_parser -from spdx.parser.tagvalue.parser import tagvalue_parser +from spdx.parser.tagvalue import tagvalue_parser from spdx.parser.xml import xml_parser from spdx.parser.yaml import yaml_parser diff --git a/src/spdx/parser/tagvalue/lexer/__init__.py b/src/spdx/parser/tagvalue/__init__.py similarity index 100% rename from src/spdx/parser/tagvalue/lexer/__init__.py rename to src/spdx/parser/tagvalue/__init__.py diff --git a/src/spdx/parser/tagvalue/parser/helper_methods.py b/src/spdx/parser/tagvalue/helper_methods.py similarity index 100% rename from src/spdx/parser/tagvalue/parser/helper_methods.py rename to src/spdx/parser/tagvalue/helper_methods.py diff --git a/src/spdx/parser/tagvalue/lexer/tagvalue.py b/src/spdx/parser/tagvalue/lexer.py similarity index 76% rename from src/spdx/parser/tagvalue/lexer/tagvalue.py rename to src/spdx/parser/tagvalue/lexer.py index 3e4bb3569..906e26067 100644 --- a/src/spdx/parser/tagvalue/lexer/tagvalue.py +++ b/src/spdx/parser/tagvalue/lexer.py @@ -28,7 +28,7 @@ class SPDXLexer(object): "Creator": "CREATOR", "Created": "CREATED", "CreatorComment": "CREATOR_COMMENT", - "LicenseListVersion": "LIC_LIST_VER", + "LicenseListVersion": "LICENSE_LIST_VERSION", # Annotation fields "Annotator": "ANNOTATOR", "AnnotationDate": "ANNOTATION_DATE", @@ -41,25 +41,25 @@ class SPDXLexer(object): # Package fields "PackageName": "PKG_NAME", "PackageVersion": "PKG_VERSION", - "PackageDownloadLocation": "PKG_DOWN", + "PackageDownloadLocation": "PKG_DOWWNLOAD_LOCATION", "FilesAnalyzed": "PKG_FILES_ANALYZED", - "PackageSummary": "PKG_SUM", - "PackageSourceInfo": "PKG_SRC_INFO", + "PackageSummary": "PKG_SUMMARY", + "PackageSourceInfo": "PKG_SOURCE_INFO", "PackageFileName": "PKG_FILE_NAME", - "PackageSupplier": "PKG_SUPPL", - "PackageOriginator": "PKG_ORIG", + "PackageSupplier": "PKG_SUPPLIER", + "PackageOriginator": "PKG_ORIGINATOR", "PackageChecksum": "PKG_CHECKSUM", - "PackageVerificationCode": "PKG_VERF_CODE", - "PackageDescription": "PKG_DESC", + "PackageVerificationCode": "PKG_VERIFICATION_CODE", + "PackageDescription": "PKG_DESCRIPTION", "PackageComment": "PKG_COMMENT", - "PackageLicenseDeclared": "PKG_LICS_DECL", - "PackageLicenseConcluded": "PKG_LICS_CONC", - "PackageLicenseInfoFromFiles": "PKG_LICS_FFILE", - "PackageLicenseComments": "PKG_LICS_COMMENT", - "PackageCopyrightText": "PKG_CPY_TEXT", - "PackageHomePage": "PKG_HOME", - "ExternalRef": "PKG_EXT_REF", - "ExternalRefComment": "PKG_EXT_REF_COMMENT", + "PackageLicenseDeclared": "PKG_LICENSE_DECLARED", + "PackageLicenseConcluded": "PKG_LICENSE_CONCLUDED", + "PackageLicenseInfoFromFiles": "PKG_LICENSE_INFO", + "PackageLicenseComments": "PKG_LICENSE_COMMENT", + "PackageCopyrightText": "PKG_COPYRIGHT_TEXT", + "PackageHomePage": "PKG_HOMEPAGE", + "ExternalRef": "PKG_EXTERNAL_REF", + "ExternalRefComment": "PKG_EXTERNAL_REF_COMMENT", "PackageAttributionText": "PKG_ATTRIBUTION_TEXT", "PrimaryPackagePurpose": "PRIMARY_PACKAGE_PURPOSE", "BuiltDate": "BUILT_DATE", @@ -69,29 +69,29 @@ class SPDXLexer(object): "FileName": "FILE_NAME", "FileType": "FILE_TYPE", "FileChecksum": "FILE_CHECKSUM", - "LicenseConcluded": "FILE_LICS_CONC", - "LicenseInfoInFile": "FILE_LICS_INFO", - "FileCopyrightText": "FILE_CR_TEXT", - "LicenseComments": "FILE_LICS_COMMENT", + "LicenseConcluded": "FILE_LICENSE_CONCLUDED", + "LicenseInfoInFile": "FILE_LICENSE_INFO", + "FileCopyrightText": "FILE_COPYRIGHT_TEXT", + "LicenseComments": "FILE_LICENSE_COMMENT", "FileComment": "FILE_COMMENT", "FileNotice": "FILE_NOTICE", - "FileContributor": "FILE_CONTRIB", + "FileContributor": "FILE_CONTRIBUTOR", "FileAttributionText": "FILE_ATTRIBUTION_TEXT", # ExtractedLicensingInfo fields - "LicenseID": "LICS_ID", - "ExtractedText": "LICS_TEXT", - "LicenseName": "LICS_NAME", - "LicenseCrossReference": "LICS_CRS_REF", - "LicenseComment": "LICS_COMMENT", + "LicenseID": "LICENSE_ID", + "ExtractedText": "LICENSE_TEXT", + "LicenseName": "LICENSE_NAME", + "LicenseCrossReference": "LICENSE_CROSS_REF", + "LicenseComment": "LICENSE_COMMENT", # Snippet fields "SnippetSPDXID": "SNIPPET_SPDX_ID", "SnippetName": "SNIPPET_NAME", "SnippetComment": "SNIPPET_COMMENT", - "SnippetCopyrightText": "SNIPPET_CR_TEXT", - "SnippetLicenseComments": "SNIPPET_LICS_COMMENT", + "SnippetCopyrightText": "SNIPPET_COPYRIGHT_TEXT", + "SnippetLicenseComments": "SNIPPET_LICENSE_COMMENT", "SnippetFromFileSPDXID": "SNIPPET_FILE_SPDXID", - "SnippetLicenseConcluded": "SNIPPET_LICS_CONC", - "LicenseInfoInSnippet": "SNIPPET_LICS_INFO", + "SnippetLicenseConcluded": "SNIPPET_LICENSE_CONCLUDED", + "LicenseInfoInSnippet": "SNIPPET_LICENSE_INFO", "SnippetAttributionText": "SNIPPET_ATTRIBUTION_TEXT", "SnippetByteRange": "SNIPPET_BYTE_RANGE", "SnippetLineRange": "SNIPPET_LINE_RANGE", @@ -105,13 +105,13 @@ class SPDXLexer(object): "TEXT", "TOOL_VALUE", "UNKNOWN_TAG", - "ORG_VALUE", + "ORGANIZATION_VALUE", "PERSON_VALUE", "DATE", "LINE", "CHECKSUM", - "DOC_REF_ID", - "DOC_URI", + "EXT_DOC_REF_ID", + "EXT_DOC_URI", "EXT_DOC_REF_CHECKSUM", ] + list(reserved.values()) @@ -146,12 +146,12 @@ def t_CHECKSUM(self, t): return t @TOKEN(r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)") - def t_DOC_REF_ID(self, t): + def t_EXT_DOC_REF_ID(self, t): t.value = t.value[1:].strip() return t @TOKEN(r"\s*((ht|f)tps?:\/\/\S*)") - def t_DOC_URI(self, t): + def t_EXT_DOC_URI(self, t): t.value = t.value.strip() return t @@ -166,7 +166,7 @@ def t_TOOL_VALUE(self, t): return t @TOKEN(r":\s*Organization:.+") - def t_ORG_VALUE(self, t): + def t_ORGANIZATION_VALUE(self, t): t.value = t.value[1:].strip() return t diff --git a/src/spdx/parser/tagvalue/parser/tagvalue.py b/src/spdx/parser/tagvalue/parser.py similarity index 78% rename from src/spdx/parser/tagvalue/parser/tagvalue.py rename to src/spdx/parser/tagvalue/parser.py index 9ae442f3e..8fbc3d019 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue.py +++ b/src/spdx/parser/tagvalue/parser.py @@ -34,8 +34,8 @@ from spdx.parser.error import SPDXParsingError from spdx.parser.logger import Logger from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages -from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer -from spdx.parser.tagvalue.parser.helper_methods import grammar_rule, str_from_text, parse_checksum, set_value, \ +from spdx.parser.tagvalue.lexer import SPDXLexer +from spdx.parser.tagvalue.helper_methods import grammar_rule, str_from_text, parse_checksum, set_value, \ TAG_DATA_MODEL_FIELD CLASS_MAPPING = dict(File="files", Annotation="annotations", Relationship="relationships", Snippet="snippets", @@ -78,53 +78,59 @@ def p_start_attrib(self, p): "attrib : spdx_version\n| spdx_id\n| data_license\n| doc_name\n| document_comment\n| document_namespace\n| " "creator\n| created\n| creator_comment\n| license_list_version\n| ext_doc_ref\n" # attributes for file - "| file_name\n| file_type\n| file_checksum\n| file_conc\n| file_lics_info\n| file_cr_text\n" - "| file_lics_comment\n| file_attribution_text\n| file_notice\n| file_comment\n| file_contrib\n" + "| file_name\n| file_type\n| file_checksum\n| file_license_concluded\n| file_license_info\n" + "| file_copyright_text\n| file_license_comment\n| file_attribution_text\n| file_notice\n| file_comment\n" + "| file_contributor\n" # attributes for annotation "| annotator\n| annotation_date\n| annotation_comment\n| annotation_type\n| annotation_spdx_id\n" # attributes for relationship "| relationship\n" # attributes for snippet - "| snip_spdx_id\n| snip_name\n| snip_comment\n| snippet_attribution_text\n| snip_cr_text\n" - "| snip_lic_comment\n| file_spdx_id\n| snip_lics_conc\n| snip_lics_info\n| snip_byte_range\n" - "| snip_line_range\n" + "| snippet_spdx_id\n| snippet_name\n| snippet_comment\n| snippet_attribution_text\n| snippet_copyright_text\n" + "| snippet_license_comment\n| file_spdx_id\n| snippet_license_concluded\n| snippet_license_info\n" + "| snippet_byte_range\n| snippet_line_range\n" # attributes for package "| package_name\n| package_version\n| download_location\n| files_analyzed\n| homepage\n" "| summary\n| source_info\n| pkg_file_name\n| supplier\n| originator\n| pkg_checksum\n" - "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_lic_decl\n| pkg_lic_conc\n" - "| pkg_lic_ff\n| pkg_lic_comment\n| pkg_cr_text\n| pkg_ext_ref\n| primary_package_purpose\n" - "| built_date\n| release_date\n| valid_until_date\n" + "| verification_code\n| description\n| pkg_comment\n| pkg_attribution_text\n| pkg_license_declared\n" + "| pkg_license_concluded\n| pkg_license_info\n| pkg_license_comment\n| pkg_copyright_text\n" + "| pkg_external_ref\n| primary_package_purpose\n| built_date\n| release_date\n| valid_until_date\n" # attributes for extracted licensing info - "| license_id\n| extracted_text\n| license_name\n| lic_xref\n| lic_comment\n" + "| license_id\n| extracted_text\n| license_name\n| license_cross_ref\n| lic_comment\n" "| unknown_tag ") def p_attrib(self, p): pass # general parsing methods - @grammar_rule("license_id : LICS_ID error\n lic_xref : LICS_CRS_REF error\n lic_comment : LICS_COMMENT error\n " - "license_name : LICS_NAME error\n extracted_text : LICS_TEXT error\n " - "file_name : FILE_NAME error\n file_contrib : FILE_CONTRIB error\n file_notice : FILE_NOTICE error\n " - "file_cr_text : FILE_CR_TEXT error\n file_lics_comment : FILE_LICS_COMMENT error\n " - "file_attribution_text : FILE_ATTRIBUTION_TEXT error\n file_lics_info : FILE_LICS_INFO error\n " - "file_comment : FILE_COMMENT error\n file_checksum : FILE_CHECKSUM error\n " - "file_conc : FILE_LICS_CONC error\n file_type : FILE_TYPE error\n " + @grammar_rule("license_id : LICENSE_ID error\n license_cross_ref : LICENSE_CROSS_REF error\n " + "lic_comment : LICENSE_COMMENT error\n license_name : LICENSE_NAME error\n " + "extracted_text : LICENSE_TEXT error\n " + "file_name : FILE_NAME error\n file_contributor : FILE_CONTRIBUTOR error\n " + "file_notice : FILE_NOTICE error\n file_copyright_text : FILE_COPYRIGHT_TEXT error\n " + "file_license_comment : FILE_LICENSE_COMMENT error\n " + "file_license_info : FILE_LICENSE_INFO error\n file_comment : FILE_COMMENT error\n " + "file_checksum : FILE_CHECKSUM error\n file_license_concluded : FILE_LICENSE_CONCLUDED error\n " + "file_type : FILE_TYPE error\n file_attribution_text : FILE_ATTRIBUTION_TEXT error\n " "package_name : PKG_NAME error\n pkg_attribution_text : PKG_ATTRIBUTION_TEXT error\n " - "description : PKG_DESC error\n pkg_comment : PKG_COMMENT error\n summary : PKG_SUM error\n " - "pkg_cr_text : PKG_CPY_TEXT error\n pkg_ext_ref : PKG_EXT_REF error\n " - "pkg_lic_comment : PKG_LICS_COMMENT error\n pkg_lic_decl : PKG_LICS_DECL error\n " - "pkg_lic_ff : PKG_LICS_FFILE error \n pkg_lic_conc : PKG_LICS_CONC error\n " - "source_info : PKG_SRC_INFO error\n homepage : PKG_HOME error\n pkg_checksum : PKG_CHECKSUM error\n " - "verification_code : PKG_VERF_CODE error\n download_location : PKG_DOWN error\n " - "files_analyzed : PKG_FILES_ANALYZED error\n originator : PKG_ORIG error\n " - "supplier : PKG_SUPPL error\n pkg_file_name : PKG_FILE_NAME error\n " + "description : PKG_DESCRIPTION error\n pkg_comment : PKG_COMMENT error\n " + "summary : PKG_SUMMARY error\n pkg_copyright_text : PKG_COPYRIGHT_TEXT error\n " + "pkg_external_ref : PKG_EXTERNAL_REF error\n pkg_license_comment : PKG_LICENSE_COMMENT error\n " + "pkg_license_declared : PKG_LICENSE_DECLARED error\n pkg_license_info : PKG_LICENSE_INFO error \n " + "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " + "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " + "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " + "download_location : PKG_DOWWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " + "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " - "valid_until_date : VALID_UNTIL_DATE error\n snip_spdx_id : SNIPPET_SPDX_ID error\n " - "snip_name : SNIPPET_NAME error\n snip_comment : SNIPPET_COMMENT error\n " - "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n snip_cr_text : SNIPPET_CR_TEXT error\n " - "snip_lic_comment : SNIPPET_LICS_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " - "snip_lics_conc : SNIPPET_LICS_CONC error\n snip_lics_info : SNIPPET_LICS_INFO error\n " - "snip_byte_range : SNIPPET_BYTE_RANGE error\n snip_line_range : SNIPPET_LINE_RANGE error\n " + "valid_until_date : VALID_UNTIL_DATE error\n snippet_spdx_id : SNIPPET_SPDX_ID error\n " + "snippet_name : SNIPPET_NAME error\n snippet_comment : SNIPPET_COMMENT error\n " + "snippet_attribution_text : SNIPPET_ATTRIBUTION_TEXT error\n " + "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT error\n " + "snippet_license_comment : SNIPPET_LICENSE_COMMENT error\n file_spdx_id : SNIPPET_FILE_SPDXID error\n " + "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED error\n " + "snippet_license_info : SNIPPET_LICENSE_INFO error\n " + "snippet_byte_range : SNIPPET_BYTE_RANGE error\n snippet_line_range : SNIPPET_LINE_RANGE error\n " "annotator : ANNOTATOR error\n annotation_date : ANNOTATION_DATE error\n " "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error") @@ -134,25 +140,30 @@ def p_current_element_error(self, p): self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") - @grammar_rule("license_name : LICS_NAME line_or_no_assertion\n extracted_text : LICS_TEXT text_or_line\n " - "lic_comment : LICS_COMMENT text_or_line\n license_id : LICS_ID LINE\n " + @grammar_rule("license_name : LICENSE_NAME line_or_no_assertion\n extracted_text : LICENSE_TEXT text_or_line\n " + "lic_comment : LICENSE_COMMENT text_or_line\n license_id : LICENSE_ID LINE\n " "file_name : FILE_NAME LINE \n file_notice : FILE_NOTICE text_or_line\n " - "file_cr_text : FILE_CR_TEXT line_or_no_assertion_or_none\n " - "file_lics_comment : FILE_LICS_COMMENT text_or_line\n file_comment : FILE_COMMENT text_or_line\n " - "file_conc : FILE_LICS_CONC license_or_no_assertion_or_none\n " - "package_name : PKG_NAME LINE\n description : PKG_DESC text_or_line\n summary : PKG_SUM text_or_line\n " - "source_info : PKG_SRC_INFO text_or_line\n homepage : PKG_HOME line_or_no_assertion_or_none\n " - "download_location : PKG_DOWN line_or_no_assertion_or_none\n originator : PKG_ORIG actor_or_no_assertion\n " - "supplier : PKG_SUPPL actor_or_no_assertion\n pkg_comment : PKG_COMMENT text_or_line\n " - "pkg_cr_text : PKG_CPY_TEXT line_or_no_assertion_or_none\n " - "pkg_lic_decl : PKG_LICS_DECL license_or_no_assertion_or_none\n pkg_file_name : PKG_FILE_NAME LINE\n " - "pkg_lic_conc : PKG_LICS_CONC license_or_no_assertion_or_none\n package_version : PKG_VERSION LINE\n " - "pkg_lic_comment : PKG_LICS_COMMENT text_or_line\n " - "snip_spdx_id : SNIPPET_SPDX_ID LINE\n snip_name : SNIPPET_NAME LINE\n " - "snip_comment : SNIPPET_COMMENT text_or_line\n " - "snip_cr_text : SNIPPET_CR_TEXT line_or_no_assertion_or_none\n " - "snip_lic_comment : SNIPPET_LICS_COMMENT text_or_line\n file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " - "snip_lics_conc : SNIPPET_LICS_CONC license_or_no_assertion_or_none\n " + "file_copyright_text : FILE_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " + "file_license_comment : FILE_LICENSE_COMMENT text_or_line\n " + "file_comment : FILE_COMMENT text_or_line\n " + "file_license_concluded : FILE_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " + "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " + "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " + "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " + "download_location : PKG_DOWWNLOAD_LOCATION line_or_no_assertion_or_none\n " + "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " + "pkg_comment : PKG_COMMENT text_or_line\n " + "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " + "pkg_license_declared : PKG_LICENSE_DECLARED license_or_no_assertion_or_none\n " + "pkg_file_name : PKG_FILE_NAME LINE\n " + "pkg_license_concluded : PKG_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " + "package_version : PKG_VERSION LINE\n pkg_license_comment : PKG_LICENSE_COMMENT text_or_line\n " + "snippet_spdx_id : SNIPPET_SPDX_ID LINE\n snippet_name : SNIPPET_NAME LINE\n " + "snippet_comment : SNIPPET_COMMENT text_or_line\n " + "snippet_copyright_text : SNIPPET_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " + "snippet_license_comment : SNIPPET_LICENSE_COMMENT text_or_line\n " + "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " + "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " "annotation_comment : ANNOTATION_COMMENT text_or_line\n " @@ -189,7 +200,7 @@ def p_none(self, p): def p_license(self, p): p[0] = get_spdx_licensing().parse(p[1]) - @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORG_VALUE") + @grammar_rule("actor_or_no_assertion : PERSON_VALUE\n | ORGANIZATION_VALUE") def p_actor_values(self, p): p[0] = ActorParser.parse_actor(p[1]) @@ -204,7 +215,7 @@ def p_spdx_id(self, p): # parsing methods for creation info / document level - @grammar_rule("license_list_version : LIC_LIST_VER error\n document_comment : DOC_COMMENT error\n " + @grammar_rule("license_list_version : LICENSE_LIST_VERSION error\n document_comment : DOC_COMMENT error\n " "document_namespace : DOC_NAMESPACE error\n data_license : DOC_LICENSE error\n " "doc_name : DOC_NAME error\n ext_doc_ref : EXT_DOC_REF error\n spdx_version : DOC_VERSION error\n " "creator_comment : CREATOR_COMMENT error\n creator : CREATOR error\n created : CREATED error") @@ -218,11 +229,11 @@ def p_creation_info_value_error(self, p): def p_generic_value_creation_info(self, p): set_value(p, self.creation_info) - @grammar_rule("license_list_version : LIC_LIST_VER LINE") + @grammar_rule("license_list_version : LICENSE_LIST_VERSION LINE") def p_license_list_version(self, p): set_value(p, self.creation_info, method_to_apply=Version.from_string) - @grammar_rule("ext_doc_ref : EXT_DOC_REF DOC_REF_ID DOC_URI EXT_DOC_REF_CHECKSUM") + @grammar_rule("ext_doc_ref : EXT_DOC_REF EXT_DOC_REF_ID EXT_DOC_URI EXT_DOC_REF_CHECKSUM") def p_external_document_ref(self, p): document_ref_id = p[2] document_uri = p[3] @@ -230,7 +241,7 @@ def p_external_document_ref(self, p): external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) - @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORG_VALUE") + @grammar_rule("creator : CREATOR PERSON_VALUE\n| CREATOR TOOL_VALUE\n| CREATOR ORGANIZATION_VALUE") def p_creator(self, p): self.creation_info.setdefault("creators", []).append(ActorParser.parse_actor(p[2])) @@ -240,14 +251,14 @@ def p_created(self, p): # parsing methods for extracted licensing info - @grammar_rule("lic_xref : LICS_CRS_REF LINE") + @grammar_rule("license_cross_ref : LICENSE_CROSS_REF LINE") def p_extracted_cross_reference(self, p): if self.check_that_current_element_matches_class_for_value(ExtractedLicensingInfo, p.lineno(1)): self.current_element.setdefault("cross_references", []).append(p[2]) # parsing methods for file - @grammar_rule("file_contrib : FILE_CONTRIB LINE") + @grammar_rule("file_contributor : FILE_CONTRIBUTOR LINE") def p_file_contributor(self, p): if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): self.current_element.setdefault("contributors", []).append(p[2]) @@ -257,7 +268,7 @@ def p_file_attribution_text(self, p): if self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("file_lics_info : FILE_LICS_INFO license_or_no_assertion_or_none") + @grammar_rule("file_license_info : FILE_LICENSE_INFO license_or_no_assertion_or_none") def p_file_license_info(self, p): if not self.check_that_current_element_matches_class_for_value(File, p.lineno(1)): return @@ -291,7 +302,7 @@ def p_pkg_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("pkg_ext_ref : PKG_EXT_REF LINE PKG_EXT_REF_COMMENT text_or_line\n | PKG_EXT_REF LINE") + @grammar_rule("pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE") def p_pkg_external_refs(self, p): if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): return @@ -321,7 +332,7 @@ def p_pkg_external_refs(self, p): return self.current_element.setdefault("external_references", []).append(external_package_ref) - @grammar_rule("pkg_lic_ff : PKG_LICS_FFILE license_or_no_assertion_or_none") + @grammar_rule("pkg_license_info : PKG_LICENSE_INFO license_or_no_assertion_or_none") def p_pkg_license_info_from_file(self, p): if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): return @@ -337,7 +348,7 @@ def p_pkg_checksum(self, p): checksum = parse_checksum(p[2]) self.current_element.setdefault("checksums", []).append(checksum) - @grammar_rule("verification_code : PKG_VERF_CODE LINE") + @grammar_rule("verification_code : PKG_VERIFICATION_CODE LINE") def p_pkg_verification_code(self, p): if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): return @@ -386,7 +397,7 @@ def p_snippet_attribution_text(self, p): if self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("snip_lics_info : SNIPPET_LICS_INFO license_or_no_assertion_or_none") + @grammar_rule("snippet_license_info : SNIPPET_LICENSE_INFO license_or_no_assertion_or_none") def p_snippet_license_info(self, p): if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): return @@ -395,7 +406,7 @@ def p_snippet_license_info(self, p): else: self.current_element.setdefault("license_info_in_snippet", []).append(p[2]) - @grammar_rule("snip_byte_range : SNIPPET_BYTE_RANGE LINE\n snip_line_range : SNIPPET_LINE_RANGE LINE") + @grammar_rule("snippet_byte_range : SNIPPET_BYTE_RANGE LINE\n snippet_line_range : SNIPPET_LINE_RANGE LINE") def p_snippet_range(self, p): if not self.check_that_current_element_matches_class_for_value(Snippet, p.lineno(1)): return @@ -415,7 +426,7 @@ def p_snippet_range(self, p): # parsing methods for annotation - @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORG_VALUE") + @grammar_rule("annotator : ANNOTATOR PERSON_VALUE\n| TOOL_VALUE\n| ORGANIZATION_VALUE") def p_annotator(self, p): self.initialize_new_current_element(Annotation) set_value(p, self.current_element, method_to_apply=ActorParser.parse_actor) @@ -456,7 +467,7 @@ def p_relationship(self, p): if len(p) == 5: self.current_element["comment"] = p[4] - @grammar_rule("relationship_value : DOC_REF_ID LINE") + @grammar_rule("relationship_value : EXT_DOC_REF_ID LINE") def p_relationship_value_with_doc_ref(self, p): p[0] = p[1] + ":" + p[2] diff --git a/src/spdx/parser/tagvalue/parser/__init__.py b/src/spdx/parser/tagvalue/parser/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/spdx/parser/tagvalue/parser/tagvalue_parser.py b/src/spdx/parser/tagvalue/tagvalue_parser.py similarity index 93% rename from src/spdx/parser/tagvalue/parser/tagvalue_parser.py rename to src/spdx/parser/tagvalue/tagvalue_parser.py index ba4a53ead..d71c3c047 100644 --- a/src/spdx/parser/tagvalue/parser/tagvalue_parser.py +++ b/src/spdx/parser/tagvalue/tagvalue_parser.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from spdx.model.document import Document -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser def parse_from_file(file_name: str) -> Document: diff --git a/tests/spdx/parser/tagvalue/test_annotation_parser.py b/tests/spdx/parser/tagvalue/test_annotation_parser.py index cf9ee7614..65e9fa1d1 100644 --- a/tests/spdx/parser/tagvalue/test_annotation_parser.py +++ b/tests/spdx/parser/tagvalue/test_annotation_parser.py @@ -8,54 +8,52 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import re from datetime import datetime -from unittest import TestCase import pytest from spdx.model.annotation import AnnotationType from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_annotation(): parser = Parser() - annotation_str = '\n'.join([ - 'Annotator: Person: Jane Doe()', - 'AnnotationDate: 2010-01-29T18:30:22Z', - 'AnnotationComment: Document level annotation', - 'AnnotationType: OTHER', - 'SPDXREF: SPDXRef-DOCUMENT' + annotation_str = "\n".join([ + "Annotator: Person: Jane Doe()", + "AnnotationDate: 2010-01-29T18:30:22Z", + "AnnotationComment: Document level annotation", + "AnnotationType: OTHER", + "SPDXREF: SPDXRef-DOCUMENT" ]) document = parser.parse("\n".join([DOCUMENT_STR, annotation_str])) assert document is not None assert len(document.annotations) == 1 annotation = document.annotations[0] - assert annotation.annotator.name == 'Jane Doe' + assert annotation.annotator.name == "Jane Doe" assert annotation.annotation_date == datetime(2010, 1, 29, 18, 30, 22) - assert annotation.annotation_comment == 'Document level annotation' + assert annotation.annotation_comment == "Document level annotation" assert annotation.annotation_type == AnnotationType.OTHER - assert annotation.spdx_id == 'SPDXRef-DOCUMENT' + assert annotation.spdx_id == "SPDXRef-DOCUMENT" @pytest.mark.parametrize("annotation_str, expected_message", [ - ('Annotator: Person: Jane Doe()', r"__init__() missing 4 " + ("Annotator: Person: Jane Doe()", r"__init__() missing 4 " "required positional arguments: 'spdx_id', 'annotation_type', " "'annotation_date', and 'annotation_comment'"), - ('Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23', + ("Annotator: Person: Jane Doe()\nAnnotationType: SOURCE\nAnnotationDate: 201001-2912:23", "Error while parsing Annotation: ['Invalid AnnotationType: SOURCE. Line: 2', " "'Error while parsing AnnotationDate: Token did not match specified grammar " "rule. Line: 3']"), - ('Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n' - 'AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT', + ("Annotator: Jane Doe()\nAnnotationDate: 201001-29T18:30:22Z\n" + "AnnotationComment: Document level annotation\nAnnotationType: OTHER\nSPDXREF: SPDXRef-DOCUMENT", "Error while parsing Annotation: ['Error while parsing Annotator: Token did " "not match specified grammar rule. Line: 1', 'Error while parsing " "AnnotationDate: Token did not match specified grammar rule. Line: 2']"), - ('Annotator: Person: ()', "Error while parsing Annotation: [['No name for Person provided: Person: ().']]"), - ('AnnotationType: REVIEW', 'Element Annotation is not the current element in scope, probably the ' - 'expected tag to start the element (Annotator) is missing. Line: 1')]) + ("Annotator: Person: ()", "Error while parsing Annotation: [['No name for Person provided: Person: ().']]"), + ("AnnotationType: REVIEW", "Element Annotation is not the current element in scope, probably the " + "expected tag to start the element (Annotator) is missing. Line: 1")]) def test_parse_invalid_annotation(annotation_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index f98f997dd..2d789229b 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -18,21 +18,21 @@ from spdx.model.external_document_ref import ExternalDocumentRef from spdx.model.version import Version from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser -DOCUMENT_STR = '\n'.join([ - 'SPDXVersion: SPDX-2.3', - 'DataLicense: CC0-1.0', - 'DocumentName: Sample_Document-V2.3', - 'SPDXID: SPDXRef-DOCUMENT', - 'DocumentComment: Sample Comment', - 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', - 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', - 'Creator: Person: Bob (bob@example.com)', - 'Creator: Organization: Acme.', - 'Created: 2010-02-03T00:00:00Z', - 'CreatorComment: Sample Comment \nwith multiple \nlines.', - 'LicenseListVersion: 3.17' +DOCUMENT_STR = "\n".join([ + "SPDXVersion: SPDX-2.3", + "DataLicense: CC0-1.0", + "DocumentName: Sample_Document-V2.3", + "SPDXID: SPDXRef-DOCUMENT", + "DocumentComment: Sample Comment", + "DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301", + "ExternalDocumentRef: DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "Creator: Person: Bob (bob@example.com)", + "Creator: Organization: Acme.", + "Created: 2010-02-03T00:00:00Z", + "CreatorComment: Sample Comment \nwith multiple \nlines.", + "LicenseListVersion: 3.17" ]) @@ -43,15 +43,15 @@ def test_parse_creation_info(): creation_info = document.creation_info assert creation_info is not None assert creation_info.spdx_version == "SPDX-2.3" - assert creation_info.data_license == 'CC0-1.0' - assert creation_info.name == 'Sample_Document-V2.3' - assert creation_info.spdx_id == 'SPDXRef-DOCUMENT' - assert creation_info.document_comment == 'Sample Comment' - assert creation_info.document_namespace == 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + assert creation_info.data_license == "CC0-1.0" + assert creation_info.name == "Sample_Document-V2.3" + assert creation_info.spdx_id == "SPDXRef-DOCUMENT" + assert creation_info.document_comment == "Sample Comment" + assert creation_info.document_namespace == "https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301" TestCase().assertCountEqual(creation_info.creators, [Actor(ActorType.PERSON, "Bob", "bob@example.com"), Actor(ActorType.ORGANIZATION, "Acme.")]) - assert creation_info.creator_comment == 'Sample Comment \nwith multiple \nlines.' + assert creation_info.creator_comment == "Sample Comment \nwith multiple \nlines." assert creation_info.created == datetime(2010, 2, 3) assert creation_info.license_list_version == Version(3, 17) assert creation_info.external_document_refs == [ExternalDocumentRef("DocumentRef-spdx-tool-1.2", @@ -61,28 +61,28 @@ def test_parse_creation_info(): @pytest.mark.parametrize("document_str, expected_message", - ([('\n'.join( - ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', - 'SPDXID: SPDXRef-DOCUMENT', 'DocumentComment: Sample Comment', - 'DocumentNamespace: Sample Comment', - 'ExternalDocumentRef: DocumentRef-spdx-tool-1.2:htp://spdx.org:SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', - 'Creator: Person Bob (bob@example.com)', 'Creator: Organization: Acme [email]', - 'Created: 2010-02-03T00:00:0Z', 'CreatorComment: Sample Comment', - 'LicenseListVersion: 7']), - "Error while parsing CreationInfo: " - "['Error while parsing DocumentNamespace: Token did not match specified grammar rule. " - "Line: 6', 'Error while parsing ExternalDocumentRef: " - "Token did not match specified grammar rule. Line: 7', 'Error while parsing Creator: " - "Token did not match specified grammar rule. Line: 8', 'Error while parsing Created: " - "Token did not match specified grammar rule. Line: 10', '7 is not a valid version string']"), - ('\n'.join( - ['SPDXVersion: SPDX-2.3', 'DataLicense: CC0-1.0', 'DocumentName: Sample_Document-V2.3', - 'SPDXID: SPDXRef-DOCUMENT']), - r"__init__() missing 3 required positional arguments: " - r"'document_namespace', 'creators', and 'created'"), - ('LicenseListVersion: 3.5\nLicenseListVersion: 3.7', - "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion " - "found. Line: 2']")])) + ([("\n".join( + ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3", + "SPDXID: SPDXRef-DOCUMENT", "DocumentComment: Sample Comment", + "DocumentNamespace: Sample Comment", + "ExternalDocumentRef: DocumentRef-spdx-tool-1.2:htp://spdx.org:SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "Creator: Person Bob (bob@example.com)", "Creator: Organization: Acme [email]", + "Created: 2010-02-03T00:00:0Z", "CreatorComment: Sample Comment", + "LicenseListVersion: 7"]), + "Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: " + "Token did not match specified grammar rule. Line: 6', 'Error while parsing " + "ExternalDocumentRef: Token did not match specified grammar rule. Line: 7', " + "'Error while parsing Creator: Token did not match specified grammar rule. Line: 8', " + "'Error while parsing Created: Token did not match specified grammar rule. Line: 10', " + "'7 is not a valid version string']"), + ("\n".join( + ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3", + "SPDXID: SPDXRef-DOCUMENT"]), + r"__init__() missing 3 required positional arguments: 'document_namespace', " + r"'creators', and 'created'"), + ("LicenseListVersion: 3.5\nLicenseListVersion: 3.7", + "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. " + "Line: 2']")])) def test_parse_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py index a8c1c2f66..f8e27ace1 100644 --- a/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_extracted_licensing_info_parser.py @@ -13,30 +13,30 @@ import pytest from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_extracted_licensing_info(): parser = Parser() - extracted_licensing_info_str = '\n'.join([ - 'LicenseID: LicenseRef-Beerware-4.2', - 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you ' - 'retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this ' - 'stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' - 'LicenseName: Beer-Ware License (Version 42)', - 'LicenseCrossReference: http://people.freebsd.org/~phk/', - 'LicenseCrossReference: http://another.cross.reference/', - 'LicenseComment: The beerware license has a couple of other standard variants.' + extracted_licensing_info_str = "\n".join([ + "LicenseID: LicenseRef-Beerware-4.2", + "ExtractedText: \"THE BEER-WARE LICENSE\" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you " + "retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this " + "stuff is worth it, you can buy me a beer in return Poul-Henning Kamp" + "LicenseName: Beer-Ware License (Version 42)", + "LicenseCrossReference: http://people.freebsd.org/~phk/", + "LicenseCrossReference: http://another.cross.reference/", + "LicenseComment: The beerware license has a couple of other standard variants." ]) document = parser.parse("\n".join([DOCUMENT_STR, extracted_licensing_info_str])) assert document is not None assert len(document.extracted_licensing_info) == 1 extracted_licensing_info = document.extracted_licensing_info[0] assert extracted_licensing_info.license_id == "LicenseRef-Beerware-4.2" - assert extracted_licensing_info.extracted_text == '"THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. ' \ - 'As long as you retain this notice you can do whatever you want with this stuff. ' \ - 'If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp' + assert extracted_licensing_info.extracted_text == "\"THE BEER-WARE LICENSE\" (Revision 42): phk@FreeBSD.ORG wrote this file. " \ + "As long as you retain this notice you can do whatever you want with this stuff. " \ + "If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp" assert extracted_licensing_info.license_name == "Beer-Ware License (Version 42)" TestCase().assertCountEqual(extracted_licensing_info.cross_references, ["http://people.freebsd.org/~phk/", "http://another.cross.reference/"]) @@ -45,20 +45,22 @@ def test_parse_extracted_licensing_info(): def test_parse_invalid_extracted_licensing_info(): parser = Parser() - extracted_licensing_info_str = '\n'.join([ - 'ExtractedText: "THE BEER-WARE LICENSE" (Revision 42): phk@FreeBSD.ORG wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp', - 'LicenseName: Beer-Ware License (Version 42)', - 'LicenseCrossReference: http://people.freebsd.org/~phk/', - 'LicenseComment: The beerware license has a couple of other standard variants.']) + extracted_licensing_info_str = "\n".join([ + "ExtractedText: \"THE BEER-WARE LICENSE\" (Revision 42): phk@FreeBSD.ORG wrote this file. " + "As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you " + "think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp", + "LicenseName: Beer-Ware License (Version 42)", + "LicenseCrossReference: http://people.freebsd.org/~phk/", + "LicenseComment: The beerware license has a couple of other standard variants."]) with pytest.raises(SPDXParsingError) as err: parser.parse(extracted_licensing_info_str) - assert err.value.get_messages() == ['Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing. Line: 1', - 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing. Line: 2', - 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing. Line: 3', - 'Element ExtractedLicensingInfo is not the current element in scope, probably ' - 'the expected tag to start the element (LicenseID) is missing. Line: 4'] + assert err.value.get_messages() == ["Element ExtractedLicensingInfo is not the current element in scope, probably " + "the expected tag to start the element (LicenseID) is missing. Line: 1", + "Element ExtractedLicensingInfo is not the current element in scope, probably " + "the expected tag to start the element (LicenseID) is missing. Line: 2", + "Element ExtractedLicensingInfo is not the current element in scope, probably " + "the expected tag to start the element (LicenseID) is missing. Line: 3", + "Element ExtractedLicensingInfo is not the current element in scope, probably " + "the expected tag to start the element (LicenseID) is missing. Line: 4"] diff --git a/tests/spdx/parser/tagvalue/test_file_parser.py b/tests/spdx/parser/tagvalue/test_file_parser.py index c6190850a..fecc8d77b 100644 --- a/tests/spdx/parser/tagvalue/test_file_parser.py +++ b/tests/spdx/parser/tagvalue/test_file_parser.py @@ -13,51 +13,51 @@ from spdx.model.file import FileType from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_file(): parser = Parser() - file_str = '\n'.join([ - 'FileName: testfile.java', - 'SPDXID: SPDXRef-File', - 'FileType: SOURCE', - 'FileType: TEXT', - 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'LicenseConcluded: Apache-2.0', - 'LicenseInfoInFile: Apache-2.0', - 'FileCopyrightText: Copyright 2014 Acme Inc.', - 'FileComment: Very long file', - 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + file_str = "\n".join([ + "FileName: testfile.java", + "SPDXID: SPDXRef-File", + "FileType: SOURCE", + "FileType: TEXT", + "FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + "LicenseConcluded: Apache-2.0", + "LicenseInfoInFile: Apache-2.0", + "FileCopyrightText: Copyright 2014 Acme Inc.", + "FileComment: Very long file", + "FileAttributionText: Acknowledgements that might be required to be communicated in some contexts." ]) document = parser.parse("\n".join([DOCUMENT_STR, file_str])) assert document is not None assert len(document.files) == 1 spdx_file = document.files[0] - assert spdx_file.name == 'testfile.java' - assert spdx_file.spdx_id == 'SPDXRef-File' + assert spdx_file.name == "testfile.java" + assert spdx_file.spdx_id == "SPDXRef-File" assert spdx_file.file_type == [FileType.SOURCE, FileType.TEXT] - assert spdx_file.comment == 'Very long file' + assert spdx_file.comment == "Very long file" assert spdx_file.attribution_texts == [ - 'Acknowledgements that might be required to be communicated in some contexts.'] + "Acknowledgements that might be required to be communicated in some contexts."] assert spdx_file.license_info_in_file == [get_spdx_licensing().parse("Apache-2.0")] assert spdx_file.license_concluded == get_spdx_licensing().parse("Apache-2.0") def test_parse_invalid_file(): parser = Parser() - file_str = '\n'.join([ - 'FileName: testfile.java', - 'SPDXID: SPDXRef-File', - 'FileType: SOUCE', - 'FileType: TEXT', - 'FileChecksum: SHA3: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'LicenseConcluded: Apache-2.0', - 'LicenseInfoInFile: Apache-2.0', - 'FileCopyrightText: Copyright 2014 Acme Inc.', - 'FileComment: Very long file', - 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + file_str = "\n".join([ + "FileName: testfile.java", + "SPDXID: SPDXRef-File", + "FileType: SOUCE", + "FileType: TEXT", + "FileChecksum: SHA3: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + "LicenseConcluded: Apache-2.0", + "LicenseInfoInFile: Apache-2.0", + "FileCopyrightText: Copyright 2014 Acme Inc.", + "FileComment: Very long file", + "FileAttributionText: Acknowledgements that might be required to be communicated in some contexts." ]) with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_helper_methods.py b/tests/spdx/parser/tagvalue/test_helper_methods.py index d38952502..75e7f0742 100644 --- a/tests/spdx/parser/tagvalue/test_helper_methods.py +++ b/tests/spdx/parser/tagvalue/test_helper_methods.py @@ -11,7 +11,7 @@ import pytest from spdx.model.checksum import ChecksumAlgorithm -from spdx.parser.tagvalue.parser.helper_methods import parse_checksum +from spdx.parser.tagvalue.helper_methods import parse_checksum @pytest.mark.parametrize("checksum_str, algorithm, value", diff --git a/tests/spdx/parser/tagvalue/test_package_parser.py b/tests/spdx/parser/tagvalue/test_package_parser.py index 02e9dea2a..84b61f0d0 100644 --- a/tests/spdx/parser/tagvalue/test_package_parser.py +++ b/tests/spdx/parser/tagvalue/test_package_parser.py @@ -16,53 +16,53 @@ from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory, PackagePurpose from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_package(): parser = Parser() - package_str = '\n'.join([ - 'PackageName: Test', - 'SPDXID: SPDXRef-Package', - 'PackageVersion: 1:22.36.1-8+deb11u1', - 'PackageDownloadLocation: http://example.com/test', - 'FilesAnalyzed: True', - 'PackageSummary: Test package', - 'PackageSourceInfo: Version 1.0 of test', - 'PackageFileName: test-1.0.zip', - 'PackageSupplier: Organization:ACME', - 'PackageOriginator: Organization:ACME', - 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', - 'PackageDescription: A package.', - 'PackageComment: Comment on the package.', - 'PackageCopyrightText: Copyright 2014 Acme Inc.', - 'PackageLicenseDeclared: Apache-2.0', - 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', - 'PackageLicenseInfoFromFiles: Apache-1.0', - 'PackageLicenseInfoFromFiles: Apache-2.0', - 'PackageLicenseComments: License Comments', - 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', - 'ExternalRefComment: Some comment about the package.', - 'ExternalRef: OTHER LocationRef-acmeforge acmecorp/acmenator/4.1.3-alpha', - 'PrimaryPackagePurpose: OPERATING-SYSTEM', - 'BuiltDate: 2020-01-01T12:00:00Z', - 'ReleaseDate: 2021-01-01T12:00:00Z', - 'ValidUntilDate: 2022-01-01T12:00:00Z' + package_str = "\n".join([ + "PackageName: Test", + "SPDXID: SPDXRef-Package", + "PackageVersion: 1:22.36.1-8+deb11u1", + "PackageDownloadLocation: http://example.com/test", + "FilesAnalyzed: True", + "PackageSummary: Test package", + "PackageSourceInfo: Version 1.0 of test", + "PackageFileName: test-1.0.zip", + "PackageSupplier: Organization:ACME", + "PackageOriginator: Organization:ACME", + "PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + "PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)", + "PackageDescription: A package.", + "PackageComment: Comment on the package.", + "PackageCopyrightText: Copyright 2014 Acme Inc.", + "PackageLicenseDeclared: Apache-2.0", + "PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)", + "PackageLicenseInfoFromFiles: Apache-1.0", + "PackageLicenseInfoFromFiles: Apache-2.0", + "PackageLicenseComments: License Comments", + "ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", + "ExternalRefComment: Some comment about the package.", + "ExternalRef: OTHER LocationRef-acmeforge acmecorp/acmenator/4.1.3-alpha", + "PrimaryPackagePurpose: OPERATING-SYSTEM", + "BuiltDate: 2020-01-01T12:00:00Z", + "ReleaseDate: 2021-01-01T12:00:00Z", + "ValidUntilDate: 2022-01-01T12:00:00Z" ]) document = parser.parse("\n".join([DOCUMENT_STR, package_str])) assert document is not None package = document.packages[0] - assert package.name == 'Test' - assert package.spdx_id == 'SPDXRef-Package' - assert package.version == '1:22.36.1-8+deb11u1' + assert package.name == "Test" + assert package.spdx_id == "SPDXRef-Package" + assert package.version == "1:22.36.1-8+deb11u1" assert len(package.license_info_from_files) == 2 TestCase().assertCountEqual(package.license_info_from_files, [get_spdx_licensing().parse("Apache-1.0"), get_spdx_licensing().parse("Apache-2.0")]) - assert package.license_concluded == get_spdx_licensing().parse('LicenseRef-2.0 AND Apache-2.0') + assert package.license_concluded == get_spdx_licensing().parse("LicenseRef-2.0 AND Apache-2.0") assert package.files_analyzed is True - assert package.comment == 'Comment on the package.' + assert package.comment == "Comment on the package." assert len(package.external_references) == 2 TestCase().assertCountEqual(package.external_references, [ExternalPackageRef(ExternalPackageRefCategory.SECURITY, "cpe23Type", @@ -77,26 +77,26 @@ def test_parse_package(): @pytest.mark.parametrize("package_str, expected_message", - [('PackageDownloadLocation: SPDXRef-Package', - 'Element Package is not the current element in scope, probably the expected ' - 'tag to start the element (PackageName) is missing. Line: 1'), - ('PackageName: TestPackage', + [("PackageDownloadLocation: SPDXRef-Package", + "Element Package is not the current element in scope, probably the expected " + "tag to start the element (PackageName) is missing. Line: 1"), + ("PackageName: TestPackage", r"__init__() missing 2 required positional arguments: 'spdx_id' and 'download_location'"), - ('PackageName: TestPackage\nPackageCopyrightText:This is a copyright\n' - 'PackageCopyrightText:MultipleCopyright', + ("PackageName: TestPackage\nPackageCopyrightText:This is a copyright\n" + "PackageCopyrightText:MultipleCopyright", "Error while parsing Package: ['Multiple values for PackageCopyrightText " "found. Line: 3']"), - ('PackageName: TestPackage\nExternalRef: reference locator', - 'Error while parsing Package: ["Couldn\'t split PackageExternalRef in ' - 'category, reference_type and locator. Line: 2"]'), - ('PackageName: TestPackage\nExternalRef: category reference locator', + ("PackageName: TestPackage\nExternalRef: reference locator", + ('Error while parsing Package: ["Couldn\'t split PackageExternalRef in category, ' + 'reference_type and locator. Line: 2"]')), + ("PackageName: TestPackage\nExternalRef: category reference locator", "Error while parsing Package: ['Invalid ExternalPackageRefCategory: " "category. Line: 2']"), - ('SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n' - 'PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator', + ("SPDXID:SPDXRef-DOCUMENT\nPackageName: TestPackage\nSPDXID:SPDXRef-Package\n" + "PackageDownloadLocation: download.com\nPackageVerificationCode: category reference locator", "Error while parsing Package: ['Error while parsing PackageVerificationCode: " "Value did not match expected format. Line: 5']"), - ('PackageName: TestPackage\nBuiltDate: 2012\nValidUntilDate:202-11-02T00:00', + ("PackageName: TestPackage\nBuiltDate: 2012\nValidUntilDate:202-11-02T00:00", "Error while parsing Package: ['Error while parsing BuiltDate: Token did not " "match specified grammar rule. Line: 2', 'Error while parsing " "ValidUntilDate: Token did not match specified grammar rule. Line: 3']") diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index adc9a2ecb..18a6ee3b8 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -14,21 +14,21 @@ from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.model.spdx_none import SpdxNone from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR @pytest.mark.parametrize("relationship_str, expected_relationship", - [('\n'.join(['Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File', - 'RelationshipComment: This is a comment.']), + [("\n".join(["Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-File", + "RelationshipComment: This is a comment."]), Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-File", "This is a comment.")), - ('Relationship: SPDXRef-DOCUMENT PATCH_FOR NOASSERTION', + ("Relationship: SPDXRef-DOCUMENT PATCH_FOR NOASSERTION", Relationship("SPDXRef-DOCUMENT", RelationshipType.PATCH_FOR, SpdxNoAssertion())), - ('Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE', + ("Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE", Relationship("SPDXRef-CarolCompression", RelationshipType.DEPENDS_ON, SpdxNone())), - ('Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef', + ("Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef", Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON, "DocumentRef:AnotherRef")) ]) @@ -43,8 +43,8 @@ def test_parse_relationship(relationship_str, expected_relationship): @pytest.mark.parametrize("relationship_str, expected_message", [("Relationship: spdx_id DESCRIBES", - ['Error while parsing Relationship: ["Relationship couldn\'t be split in spdx_element_id, ' - 'relationship_type and related_spdx_element. Line: 1"]']), + ['Error while parsing Relationship: ["Relationship couldn\'t be split in ' + 'spdx_element_id, relationship_type and related_spdx_element. Line: 1"]']), ("Relationship: spdx_id IS spdx_id", ["Error while parsing Relationship: ['Invalid RelationshipType IS. Line: 1']"])]) def test_parse_invalid_relationship(relationship_str, expected_message): diff --git a/tests/spdx/parser/tagvalue/test_snippet_parser.py b/tests/spdx/parser/tagvalue/test_snippet_parser.py index 7e76bd815..5ce2b0f74 100644 --- a/tests/spdx/parser/tagvalue/test_snippet_parser.py +++ b/tests/spdx/parser/tagvalue/test_snippet_parser.py @@ -14,39 +14,39 @@ from license_expression import get_spdx_licensing from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_snippet(): parser = Parser() - snippet_str = '\n'.join([ - 'SnippetSPDXID: SPDXRef-Snippet', - 'SnippetLicenseComments: Some lic comment.', - 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', - 'SnippetComment: Some snippet comment.', - 'SnippetName: from linux kernel', - 'SnippetFromFileSPDXID: SPDXRef-DoapSource', - 'SnippetLicenseConcluded: Apache-2.0', - 'LicenseInfoInSnippet: Apache-2.0', - 'SnippetByteRange: 310:420', - 'SnippetLineRange: 5:23', - 'SnippetAttributionText: This is a text\nthat spans multiple lines.', - 'SnippetAttributionText: This text spans one line but has trailing and leading whitespaces. ' + snippet_str = "\n".join([ + "SnippetSPDXID: SPDXRef-Snippet", + "SnippetLicenseComments: Some lic comment.", + "SnippetCopyrightText: Copyright 2008-2010 John Smith ", + "SnippetComment: Some snippet comment.", + "SnippetName: from linux kernel", + "SnippetFromFileSPDXID: SPDXRef-DoapSource", + "SnippetLicenseConcluded: Apache-2.0", + "LicenseInfoInSnippet: Apache-2.0", + "SnippetByteRange: 310:420", + "SnippetLineRange: 5:23", + "SnippetAttributionText: This is a text\nthat spans multiple lines.", + "SnippetAttributionText: This text spans one line but has trailing and leading whitespaces. " ]) document = parser.parse("\n".join([DOCUMENT_STR, snippet_str])) assert document is not None assert len(document.snippets) == 1 snippet = document.snippets[0] - assert snippet.spdx_id == 'SPDXRef-Snippet' - assert snippet.name == 'from linux kernel' - assert snippet.comment == 'Some snippet comment.' - assert snippet.copyright_text == ' Copyright 2008-2010 John Smith ' - assert snippet.license_comment == 'Some lic comment.' - assert snippet.file_spdx_id == 'SPDXRef-DoapSource' - assert snippet.license_concluded == get_spdx_licensing().parse('Apache-2.0') - assert snippet.license_info_in_snippet == [get_spdx_licensing().parse('Apache-2.0')] + assert snippet.spdx_id == "SPDXRef-Snippet" + assert snippet.name == "from linux kernel" + assert snippet.comment == "Some snippet comment." + assert snippet.copyright_text == " Copyright 2008-2010 John Smith " + assert snippet.license_comment == "Some lic comment." + assert snippet.file_spdx_id == "SPDXRef-DoapSource" + assert snippet.license_concluded == get_spdx_licensing().parse("Apache-2.0") + assert snippet.license_info_in_snippet == [get_spdx_licensing().parse("Apache-2.0")] assert snippet.byte_range[0] == 310 assert snippet.byte_range[1] == 420 assert snippet.line_range[0] == 5 @@ -57,15 +57,15 @@ def test_parse_snippet(): @pytest.mark.parametrize("snippet_str, expected_message", [ - ('SnippetName: TestSnippet', 'Element Snippet is not the current element in scope, probably the expected ' - 'tag to start the element (SnippetSPDXID) is missing. Line: 1'), - ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4', + ("SnippetName: TestSnippet", "Element Snippet is not the current element in scope, probably the expected " + "tag to start the element (SnippetSPDXID) is missing. Line: 1"), + ("SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1,4", 'Error while parsing Snippet: ["Value for SnippetByteRange doesn\'t match ' 'valid range pattern. Line: 2"]'), - ('SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23', + ("SnippetSPDXID: SPDXDRef-Snippet\nSnippetByteRange: 1:4\nSnippetByteRange:10:23", "Error while parsing Snippet: ['Multiple values for SnippetByteRange found. " "Line: 3']"), - ('SnippetSPDXID: SPDXRef-Snippet', r"__init__() missing 2 required " + ("SnippetSPDXID: SPDXRef-Snippet", r"__init__() missing 2 required " r"positional arguments: 'file_spdx_id' and 'byte_range'") ]) def test_parse_invalid_snippet(snippet_str, expected_message): diff --git a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py index bd82fab3b..4d4b2cdb9 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py @@ -9,11 +9,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from unittest import TestCase - import pytest -from spdx.parser.tagvalue.lexer.tagvalue import SPDXLexer +from spdx.parser.tagvalue.lexer import SPDXLexer @pytest.fixture @@ -30,259 +28,259 @@ def token_assert_helper(token, token_type, value, line_number): def test_tokenization_of_document(lexer): - document_str = '\n'.join([ - 'SPDXVersion: SPDX-2.1', - 'DataLicense: CC0-1.0', - 'DocumentName: Sample_Document-V2.1', - 'SPDXID: SPDXRef-DOCUMENT', - 'DocumentComment: Sample Comment', - 'DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301' + document_str = "\n".join([ + "SPDXVersion: SPDX-2.1", + "DataLicense: CC0-1.0", + "DocumentName: Sample_Document-V2.1", + "SPDXID: SPDXRef-DOCUMENT", + "DocumentComment: Sample Comment", + "DocumentNamespace: https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301" ]) lexer.input(document_str) - token_assert_helper(lexer.token(), 'DOC_VERSION', 'SPDXVersion', 1) - token_assert_helper(lexer.token(), 'LINE', 'SPDX-2.1', 1) - token_assert_helper(lexer.token(), 'DOC_LICENSE', 'DataLicense', 2) - token_assert_helper(lexer.token(), 'LINE', 'CC0-1.0', 2) - token_assert_helper(lexer.token(), 'DOC_NAME', 'DocumentName', 3) - token_assert_helper(lexer.token(), 'LINE', 'Sample_Document-V2.1', 3) - token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 4) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT', 4) - token_assert_helper(lexer.token(), 'DOC_COMMENT', 'DocumentComment', 5) - token_assert_helper(lexer.token(), 'TEXT', 'Sample Comment', 5) - token_assert_helper(lexer.token(), 'DOC_NAMESPACE', 'DocumentNamespace', 6) - token_assert_helper(lexer.token(), 'LINE', - 'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301', 6) + token_assert_helper(lexer.token(), "DOC_VERSION", "SPDXVersion", 1) + token_assert_helper(lexer.token(), "LINE", "SPDX-2.1", 1) + token_assert_helper(lexer.token(), "DOC_LICENSE", "DataLicense", 2) + token_assert_helper(lexer.token(), "LINE", "CC0-1.0", 2) + token_assert_helper(lexer.token(), "DOC_NAME", "DocumentName", 3) + token_assert_helper(lexer.token(), "LINE", "Sample_Document-V2.1", 3) + token_assert_helper(lexer.token(), "SPDX_ID", "SPDXID", 4) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT", 4) + token_assert_helper(lexer.token(), "DOC_COMMENT", "DocumentComment", 5) + token_assert_helper(lexer.token(), "TEXT", "Sample Comment", 5) + token_assert_helper(lexer.token(), "DOC_NAMESPACE", "DocumentNamespace", 6) + token_assert_helper(lexer.token(), "LINE", + "https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301", 6) def test_tokenization_of_external_document_references(lexer): - data = ''' + data = """ ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759 - ''' + """ lexer.input(data) - token_assert_helper(lexer.token(), 'EXT_DOC_REF', 'ExternalDocumentRef', 2) - token_assert_helper(lexer.token(), 'DOC_REF_ID', 'DocumentRef-spdx-tool-2.1', 2) - token_assert_helper(lexer.token(), 'DOC_URI', 'http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25' - '04E0-4F89-41D3-9A0C-0305E82C3301', 2) - token_assert_helper(lexer.token(), 'EXT_DOC_REF_CHECKSUM', 'SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759', 2) + token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 2) + token_assert_helper(lexer.token(), "EXT_DOC_REF_ID", "DocumentRef-spdx-tool-2.1", 2) + token_assert_helper(lexer.token(), "EXT_DOC_URI", "http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25" + "04E0-4F89-41D3-9A0C-0305E82C3301", 2) + token_assert_helper(lexer.token(), "EXT_DOC_REF_CHECKSUM", "SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", 2) def test_tokenization_of_file(lexer): - file_str = '\n'.join([ - 'FileName: testfile.java', - 'SPDXID: SPDXRef-File', - 'FileType: SOURCE', - 'FileType: TEXT', - 'FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'LicenseConcluded: Apache-2.0', - 'LicenseInfoInFile: Apache-2.0', - 'FileCopyrightText: Copyright 2014 Acme Inc.', - 'FileComment: Very long file', - 'FileAttributionText: Acknowledgements that might be required to be communicated in some contexts.' + file_str = "\n".join([ + "FileName: testfile.java", + "SPDXID: SPDXRef-File", + "FileType: SOURCE", + "FileType: TEXT", + "FileChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + "LicenseConcluded: Apache-2.0", + "LicenseInfoInFile: Apache-2.0", + "FileCopyrightText: Copyright 2014 Acme Inc.", + "FileComment: Very long file", + "FileAttributionText: Acknowledgements that might be required to be communicated in some contexts." ]) lexer.input(file_str) - token_assert_helper(lexer.token(), 'FILE_NAME', 'FileName', 1) - token_assert_helper(lexer.token(), 'LINE', 'testfile.java', 1) - token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 2) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-File', 2) - token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 3) - token_assert_helper(lexer.token(), 'LINE', 'SOURCE', 3) - token_assert_helper(lexer.token(), 'FILE_TYPE', 'FileType', 4) - token_assert_helper(lexer.token(), 'LINE', 'TEXT', 4) - token_assert_helper(lexer.token(), 'FILE_CHECKSUM', 'FileChecksum', 5) - token_assert_helper(lexer.token(), 'CHECKSUM', 'SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 5) - token_assert_helper(lexer.token(), 'FILE_LICS_CONC', 'LicenseConcluded', 6) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 6) - token_assert_helper(lexer.token(), 'FILE_LICS_INFO', 'LicenseInfoInFile', 7) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 7) - token_assert_helper(lexer.token(), 'FILE_CR_TEXT', 'FileCopyrightText', 8) - token_assert_helper(lexer.token(), 'TEXT', 'Copyright 2014 Acme Inc.', 8) - token_assert_helper(lexer.token(), 'FILE_COMMENT', 'FileComment', 9) - token_assert_helper(lexer.token(), 'TEXT', 'Very long file', 9) - token_assert_helper(lexer.token(), 'FILE_ATTRIBUTION_TEXT', 'FileAttributionText', 10) - token_assert_helper(lexer.token(), 'TEXT', - 'Acknowledgements that might be required to be communicated in some contexts.', + token_assert_helper(lexer.token(), "FILE_NAME", "FileName", 1) + token_assert_helper(lexer.token(), "LINE", "testfile.java", 1) + token_assert_helper(lexer.token(), "SPDX_ID", "SPDXID", 2) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-File", 2) + token_assert_helper(lexer.token(), "FILE_TYPE", "FileType", 3) + token_assert_helper(lexer.token(), "LINE", "SOURCE", 3) + token_assert_helper(lexer.token(), "FILE_TYPE", "FileType", 4) + token_assert_helper(lexer.token(), "LINE", "TEXT", 4) + token_assert_helper(lexer.token(), "FILE_CHECKSUM", "FileChecksum", 5) + token_assert_helper(lexer.token(), "CHECKSUM", "SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", 5) + token_assert_helper(lexer.token(), "FILE_LICENSE_CONCLUDED", "LicenseConcluded", 6) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 6) + token_assert_helper(lexer.token(), "FILE_LICENSE_INFO", "LicenseInfoInFile", 7) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 7) + token_assert_helper(lexer.token(), "FILE_COPYRIGHT_TEXT", "FileCopyrightText", 8) + token_assert_helper(lexer.token(), "TEXT", "Copyright 2014 Acme Inc.", 8) + token_assert_helper(lexer.token(), "FILE_COMMENT", "FileComment", 9) + token_assert_helper(lexer.token(), "TEXT", "Very long file", 9) + token_assert_helper(lexer.token(), "FILE_ATTRIBUTION_TEXT", "FileAttributionText", 10) + token_assert_helper(lexer.token(), "TEXT", + "Acknowledgements that might be required to be communicated in some contexts.", 10) def test_tokenization_of_creation_info(lexer): - creation_str = '\n'.join([ - 'Creator: Person: Bob (bob@example.com)', - 'Creator: Organization: Acme.', - 'Created: 2010-02-03T00:00:00Z', - 'CreatorComment: Sample Comment' + creation_str = "\n".join([ + "Creator: Person: Bob (bob@example.com)", + "Creator: Organization: Acme.", + "Created: 2010-02-03T00:00:00Z", + "CreatorComment: Sample Comment" ]) lexer.input(creation_str) - token_assert_helper(lexer.token(), 'CREATOR', 'Creator', 1) - token_assert_helper(lexer.token(), 'PERSON_VALUE', "Person: Bob (bob@example.com)", 1) - token_assert_helper(lexer.token(), 'CREATOR', 'Creator', 2) - token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization: Acme.', 2) - token_assert_helper(lexer.token(), 'CREATED', 'Created', 3) - token_assert_helper(lexer.token(), 'DATE', '2010-02-03T00:00:00Z', 3) - token_assert_helper(lexer.token(), 'CREATOR_COMMENT', 'CreatorComment', 4) - token_assert_helper(lexer.token(), 'TEXT', 'Sample Comment', 4) + token_assert_helper(lexer.token(), "CREATOR", "Creator", 1) + token_assert_helper(lexer.token(), "PERSON_VALUE", "Person: Bob (bob@example.com)", 1) + token_assert_helper(lexer.token(), "CREATOR", "Creator", 2) + token_assert_helper(lexer.token(), "ORGANIZATION_VALUE", "Organization: Acme.", 2) + token_assert_helper(lexer.token(), "CREATED", "Created", 3) + token_assert_helper(lexer.token(), "DATE", "2010-02-03T00:00:00Z", 3) + token_assert_helper(lexer.token(), "CREATOR_COMMENT", "CreatorComment", 4) + token_assert_helper(lexer.token(), "TEXT", "Sample Comment", 4) def test_tokenization_of_package(lexer): - package_str = '\n'.join([ - 'PackageName: Test', - 'SPDXID: SPDXRef-Package', - 'PackageVersion: Version 0.9.2', - 'PackageDownloadLocation: http://example.com/test', - 'FilesAnalyzed: True', - 'PackageSummary: Test package', - 'PackageSourceInfo: Version 1.0 of test', - 'PackageFileName: test-1.0.zip', - 'PackageSupplier: Organization:ACME', - 'PackageOriginator: Organization:ACME', - 'PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', - 'PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', - 'PackageDescription: A package.', - 'PackageComment: Comment on the package.', - 'PackageCopyrightText: Copyright 2014 Acme Inc.', - 'PackageLicenseDeclared: Apache-2.0', - 'PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)', - 'PackageLicenseInfoFromFiles: Apache-1.0', - 'PackageLicenseInfoFromFiles: Apache-2.0', - 'PackageLicenseComments: License Comments', - 'ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', - 'ExternalRefComment: Some comment about the package.', - 'PrimaryPackagePurpose: OPERATING-SYSTEM', - 'BuiltDate: 2020-01-01T12:00:00Z', - 'ReleaseDate: 2021-01-01T12:00:00Z', - 'ValidUntilDate: 2022-01-01T12:00:00Z' + package_str = "\n".join([ + "PackageName: Test", + "SPDXID: SPDXRef-Package", + "PackageVersion: Version 0.9.2", + "PackageDownloadLocation: http://example.com/test", + "FilesAnalyzed: True", + "PackageSummary: Test package", + "PackageSourceInfo: Version 1.0 of test", + "PackageFileName: test-1.0.zip", + "PackageSupplier: Organization:ACME", + "PackageOriginator: Organization:ACME", + "PackageChecksum: SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + "PackageVerificationCode: 4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)", + "PackageDescription: A package.", + "PackageComment: Comment on the package.", + "PackageCopyrightText: Copyright 2014 Acme Inc.", + "PackageLicenseDeclared: Apache-2.0", + "PackageLicenseConcluded: (LicenseRef-2.0 and Apache-2.0)", + "PackageLicenseInfoFromFiles: Apache-1.0", + "PackageLicenseInfoFromFiles: Apache-2.0", + "PackageLicenseComments: License Comments", + "ExternalRef: SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", + "ExternalRefComment: Some comment about the package.", + "PrimaryPackagePurpose: OPERATING-SYSTEM", + "BuiltDate: 2020-01-01T12:00:00Z", + "ReleaseDate: 2021-01-01T12:00:00Z", + "ValidUntilDate: 2022-01-01T12:00:00Z" ]) lexer.input(package_str) - token_assert_helper(lexer.token(), 'PKG_NAME', 'PackageName', 1) - token_assert_helper(lexer.token(), 'LINE', 'Test', 1) - token_assert_helper(lexer.token(), 'SPDX_ID', 'SPDXID', 2) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-Package', 2) - token_assert_helper(lexer.token(), 'PKG_VERSION', 'PackageVersion', 3) - token_assert_helper(lexer.token(), 'LINE', 'Version 0.9.2', 3) - token_assert_helper(lexer.token(), 'PKG_DOWN', 'PackageDownloadLocation', 4) - token_assert_helper(lexer.token(), 'LINE', 'http://example.com/test', 4) - token_assert_helper(lexer.token(), 'PKG_FILES_ANALYZED', 'FilesAnalyzed', 5) - token_assert_helper(lexer.token(), 'LINE', 'True', 5) - token_assert_helper(lexer.token(), 'PKG_SUM', 'PackageSummary', 6) - token_assert_helper(lexer.token(), 'TEXT', 'Test package', 6) - token_assert_helper(lexer.token(), 'PKG_SRC_INFO', 'PackageSourceInfo', 7) - token_assert_helper(lexer.token(), 'TEXT', 'Version 1.0 of test', 7) - token_assert_helper(lexer.token(), 'PKG_FILE_NAME', 'PackageFileName', 8) - token_assert_helper(lexer.token(), 'LINE', 'test-1.0.zip', 8) - token_assert_helper(lexer.token(), 'PKG_SUPPL', 'PackageSupplier', 9) - token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization:ACME', 9) - token_assert_helper(lexer.token(), 'PKG_ORIG', 'PackageOriginator', 10) - token_assert_helper(lexer.token(), 'ORG_VALUE', 'Organization:ACME', 10) - token_assert_helper(lexer.token(), 'PKG_CHECKSUM', 'PackageChecksum', 11) - token_assert_helper(lexer.token(), 'CHECKSUM', 'SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12', 11) - token_assert_helper(lexer.token(), 'PKG_VERF_CODE', 'PackageVerificationCode', 12) - token_assert_helper(lexer.token(), 'LINE', - '4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)', 12) - token_assert_helper(lexer.token(), 'PKG_DESC', 'PackageDescription', 13) - token_assert_helper(lexer.token(), 'TEXT', 'A package.', 13) - token_assert_helper(lexer.token(), 'PKG_COMMENT', 'PackageComment', 14) - token_assert_helper(lexer.token(), 'TEXT', 'Comment on the package.', 14) - token_assert_helper(lexer.token(), 'PKG_CPY_TEXT', 'PackageCopyrightText', 15) - token_assert_helper(lexer.token(), 'TEXT', ' Copyright 2014 Acme Inc.', 15) - token_assert_helper(lexer.token(), 'PKG_LICS_DECL', 'PackageLicenseDeclared', 16) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 16) - token_assert_helper(lexer.token(), 'PKG_LICS_CONC', 'PackageLicenseConcluded', 17) - token_assert_helper(lexer.token(), 'LINE', '(LicenseRef-2.0 and Apache-2.0)', 17) - token_assert_helper(lexer.token(), 'PKG_LICS_FFILE', 'PackageLicenseInfoFromFiles', 18) - token_assert_helper(lexer.token(), 'LINE', 'Apache-1.0', 18) - token_assert_helper(lexer.token(), 'PKG_LICS_FFILE', 'PackageLicenseInfoFromFiles', 19) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 19) - token_assert_helper(lexer.token(), 'PKG_LICS_COMMENT', 'PackageLicenseComments', 20) - token_assert_helper(lexer.token(), 'TEXT', 'License Comments', 20) - token_assert_helper(lexer.token(), 'PKG_EXT_REF', 'ExternalRef', 21) - token_assert_helper(lexer.token(), 'LINE', - 'SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:', 21) - token_assert_helper(lexer.token(), 'PKG_EXT_REF_COMMENT', 'ExternalRefComment', 22) - token_assert_helper(lexer.token(), 'TEXT', 'Some comment about the package.', 22) - token_assert_helper(lexer.token(), 'PRIMARY_PACKAGE_PURPOSE', 'PrimaryPackagePurpose', 23) - token_assert_helper(lexer.token(), 'LINE', 'OPERATING-SYSTEM', 23) - token_assert_helper(lexer.token(), 'BUILT_DATE', 'BuiltDate', 24) - token_assert_helper(lexer.token(), 'DATE', '2020-01-01T12:00:00Z', 24) - token_assert_helper(lexer.token(), 'RELEASE_DATE', 'ReleaseDate', 25) - token_assert_helper(lexer.token(), 'DATE', '2021-01-01T12:00:00Z', 25) - token_assert_helper(lexer.token(), 'VALID_UNTIL_DATE', 'ValidUntilDate', 26) - token_assert_helper(lexer.token(), 'DATE', '2022-01-01T12:00:00Z', 26) + token_assert_helper(lexer.token(), "PKG_NAME", "PackageName", 1) + token_assert_helper(lexer.token(), "LINE", "Test", 1) + token_assert_helper(lexer.token(), "SPDX_ID", "SPDXID", 2) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-Package", 2) + token_assert_helper(lexer.token(), "PKG_VERSION", "PackageVersion", 3) + token_assert_helper(lexer.token(), "LINE", "Version 0.9.2", 3) + token_assert_helper(lexer.token(), "PKG_DOWWNLOAD_LOCATION", "PackageDownloadLocation", 4) + token_assert_helper(lexer.token(), "LINE", "http://example.com/test", 4) + token_assert_helper(lexer.token(), "PKG_FILES_ANALYZED", "FilesAnalyzed", 5) + token_assert_helper(lexer.token(), "LINE", "True", 5) + token_assert_helper(lexer.token(), "PKG_SUMMARY", "PackageSummary", 6) + token_assert_helper(lexer.token(), "TEXT", "Test package", 6) + token_assert_helper(lexer.token(), "PKG_SOURCE_INFO", "PackageSourceInfo", 7) + token_assert_helper(lexer.token(), "TEXT", "Version 1.0 of test", 7) + token_assert_helper(lexer.token(), "PKG_FILE_NAME", "PackageFileName", 8) + token_assert_helper(lexer.token(), "LINE", "test-1.0.zip", 8) + token_assert_helper(lexer.token(), "PKG_SUPPLIER", "PackageSupplier", 9) + token_assert_helper(lexer.token(), "ORGANIZATION_VALUE", "Organization:ACME", 9) + token_assert_helper(lexer.token(), "PKG_ORIGINATOR", "PackageOriginator", 10) + token_assert_helper(lexer.token(), "ORGANIZATION_VALUE", "Organization:ACME", 10) + token_assert_helper(lexer.token(), "PKG_CHECKSUM", "PackageChecksum", 11) + token_assert_helper(lexer.token(), "CHECKSUM", "SHA1: 2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", 11) + token_assert_helper(lexer.token(), "PKG_VERIFICATION_CODE", "PackageVerificationCode", 12) + token_assert_helper(lexer.token(), "LINE", + "4e3211c67a2d28fced849ee1bb76e7391b93feba (something.rdf, something.txt)", 12) + token_assert_helper(lexer.token(), "PKG_DESCRIPTION", "PackageDescription", 13) + token_assert_helper(lexer.token(), "TEXT", "A package.", 13) + token_assert_helper(lexer.token(), "PKG_COMMENT", "PackageComment", 14) + token_assert_helper(lexer.token(), "TEXT", "Comment on the package.", 14) + token_assert_helper(lexer.token(), "PKG_COPYRIGHT_TEXT", "PackageCopyrightText", 15) + token_assert_helper(lexer.token(), "TEXT", " Copyright 2014 Acme Inc.", 15) + token_assert_helper(lexer.token(), "PKG_LICENSE_DECLARED", "PackageLicenseDeclared", 16) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 16) + token_assert_helper(lexer.token(), "PKG_LICENSE_CONCLUDED", "PackageLicenseConcluded", 17) + token_assert_helper(lexer.token(), "LINE", "(LicenseRef-2.0 and Apache-2.0)", 17) + token_assert_helper(lexer.token(), "PKG_LICENSE_INFO", "PackageLicenseInfoFromFiles", 18) + token_assert_helper(lexer.token(), "LINE", "Apache-1.0", 18) + token_assert_helper(lexer.token(), "PKG_LICENSE_INFO", "PackageLicenseInfoFromFiles", 19) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 19) + token_assert_helper(lexer.token(), "PKG_LICENSE_COMMENT", "PackageLicenseComments", 20) + token_assert_helper(lexer.token(), "TEXT", "License Comments", 20) + token_assert_helper(lexer.token(), "PKG_EXTERNAL_REF", "ExternalRef", 21) + token_assert_helper(lexer.token(), "LINE", + "SECURITY cpe23Type cpe:2.3:a:pivotal_software:spring_framework:4.1.0:*:*:*:*:*:*:", 21) + token_assert_helper(lexer.token(), "PKG_EXTERNAL_REF_COMMENT", "ExternalRefComment", 22) + token_assert_helper(lexer.token(), "TEXT", "Some comment about the package.", 22) + token_assert_helper(lexer.token(), "PRIMARY_PACKAGE_PURPOSE", "PrimaryPackagePurpose", 23) + token_assert_helper(lexer.token(), "LINE", "OPERATING-SYSTEM", 23) + token_assert_helper(lexer.token(), "BUILT_DATE", "BuiltDate", 24) + token_assert_helper(lexer.token(), "DATE", "2020-01-01T12:00:00Z", 24) + token_assert_helper(lexer.token(), "RELEASE_DATE", "ReleaseDate", 25) + token_assert_helper(lexer.token(), "DATE", "2021-01-01T12:00:00Z", 25) + token_assert_helper(lexer.token(), "VALID_UNTIL_DATE", "ValidUntilDate", 26) + token_assert_helper(lexer.token(), "DATE", "2022-01-01T12:00:00Z", 26) def test_tokenization_of_unknown_tag(lexer): - unknown_tag_str = 'SomeUnknownTag: SomeUnknownValue' + unknown_tag_str = "SomeUnknownTag: SomeUnknownValue" lexer.input(unknown_tag_str) - token_assert_helper(lexer.token(), 'UNKNOWN_TAG', 'SomeUnknownTag', 1) - token_assert_helper(lexer.token(), 'LINE', 'SomeUnknownValue', 1) + token_assert_helper(lexer.token(), "UNKNOWN_TAG", "SomeUnknownTag", 1) + token_assert_helper(lexer.token(), "LINE", "SomeUnknownValue", 1) def test_tokenization_of_snippet(lexer): - snippet_str = '\n'.join([ - 'SnippetSPDXID: SPDXRef-Snippet', - 'SnippetLicenseComments: Some lic comment.', - 'SnippetCopyrightText: Copyright 2008-2010 John Smith ', - 'SnippetComment: Some snippet comment.', - 'SnippetName: from linux kernel', - 'SnippetFromFileSPDXID: SPDXRef-DoapSource', - 'SnippetLicenseConcluded: Apache-2.0', - 'LicenseInfoInSnippet: Apache-2.0', - 'SnippetByteRange: 310:420', - 'SnippetLineRange: 5:23', + snippet_str = "\n".join([ + "SnippetSPDXID: SPDXRef-Snippet", + "SnippetLicenseComments: Some lic comment.", + "SnippetCopyrightText: Copyright 2008-2010 John Smith ", + "SnippetComment: Some snippet comment.", + "SnippetName: from linux kernel", + "SnippetFromFileSPDXID: SPDXRef-DoapSource", + "SnippetLicenseConcluded: Apache-2.0", + "LicenseInfoInSnippet: Apache-2.0", + "SnippetByteRange: 310:420", + "SnippetLineRange: 5:23", ]) lexer.input(snippet_str) - token_assert_helper(lexer.token(), 'SNIPPET_SPDX_ID', 'SnippetSPDXID', 1) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-Snippet', 1) - token_assert_helper(lexer.token(), 'SNIPPET_LICS_COMMENT', 'SnippetLicenseComments', 2) - token_assert_helper(lexer.token(), 'TEXT', 'Some lic comment.', 2) - token_assert_helper(lexer.token(), 'SNIPPET_CR_TEXT', 'SnippetCopyrightText', 3) - token_assert_helper(lexer.token(), 'TEXT', ' Copyright 2008-2010 John Smith ', 3) - token_assert_helper(lexer.token(), 'SNIPPET_COMMENT', 'SnippetComment', 4) - token_assert_helper(lexer.token(), 'TEXT', 'Some snippet comment.', 4) - token_assert_helper(lexer.token(), 'SNIPPET_NAME', 'SnippetName', 5) - token_assert_helper(lexer.token(), 'LINE', 'from linux kernel', 5) - token_assert_helper(lexer.token(), 'SNIPPET_FILE_SPDXID', 'SnippetFromFileSPDXID', 6) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DoapSource', 6) - token_assert_helper(lexer.token(), 'SNIPPET_LICS_CONC', - 'SnippetLicenseConcluded', 7) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 7) - token_assert_helper(lexer.token(), 'SNIPPET_LICS_INFO', 'LicenseInfoInSnippet', 8) - token_assert_helper(lexer.token(), 'LINE', 'Apache-2.0', 8) - token_assert_helper(lexer.token(), 'SNIPPET_BYTE_RANGE', 'SnippetByteRange', 9) - token_assert_helper(lexer.token(), 'LINE', '310:420', 9) - token_assert_helper(lexer.token(), 'SNIPPET_LINE_RANGE', 'SnippetLineRange', 10) - token_assert_helper(lexer.token(), 'LINE', '5:23', 10) + token_assert_helper(lexer.token(), "SNIPPET_SPDX_ID", "SnippetSPDXID", 1) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-Snippet", 1) + token_assert_helper(lexer.token(), "SNIPPET_LICENSE_COMMENT", "SnippetLicenseComments", 2) + token_assert_helper(lexer.token(), "TEXT", "Some lic comment.", 2) + token_assert_helper(lexer.token(), "SNIPPET_COPYRIGHT_TEXT", "SnippetCopyrightText", 3) + token_assert_helper(lexer.token(), "TEXT", " Copyright 2008-2010 John Smith ", 3) + token_assert_helper(lexer.token(), "SNIPPET_COMMENT", "SnippetComment", 4) + token_assert_helper(lexer.token(), "TEXT", "Some snippet comment.", 4) + token_assert_helper(lexer.token(), "SNIPPET_NAME", "SnippetName", 5) + token_assert_helper(lexer.token(), "LINE", "from linux kernel", 5) + token_assert_helper(lexer.token(), "SNIPPET_FILE_SPDXID", "SnippetFromFileSPDXID", 6) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-DoapSource", 6) + token_assert_helper(lexer.token(), "SNIPPET_LICENSE_CONCLUDED", + "SnippetLicenseConcluded", 7) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 7) + token_assert_helper(lexer.token(), "SNIPPET_LICENSE_INFO", "LicenseInfoInSnippet", 8) + token_assert_helper(lexer.token(), "LINE", "Apache-2.0", 8) + token_assert_helper(lexer.token(), "SNIPPET_BYTE_RANGE", "SnippetByteRange", 9) + token_assert_helper(lexer.token(), "LINE", "310:420", 9) + token_assert_helper(lexer.token(), "SNIPPET_LINE_RANGE", "SnippetLineRange", 10) + token_assert_helper(lexer.token(), "LINE", "5:23", 10) def test_tokenization_of_annotation(lexer): - annotation_str = '\n'.join([ - 'Annotator: Person: Jane Doe()', - 'AnnotationDate: 2010-01-29T18:30:22Z', - 'AnnotationComment: Document level annotation', - 'AnnotationType: OTHER', - 'SPDXREF: SPDXRef-DOCUMENT' + annotation_str = "\n".join([ + "Annotator: Person: Jane Doe()", + "AnnotationDate: 2010-01-29T18:30:22Z", + "AnnotationComment: Document level annotation", + "AnnotationType: OTHER", + "SPDXREF: SPDXRef-DOCUMENT" ]) lexer.input(annotation_str) - token_assert_helper(lexer.token(), 'ANNOTATOR', 'Annotator', 1) - token_assert_helper(lexer.token(), 'PERSON_VALUE', 'Person: Jane Doe()', 1) - token_assert_helper(lexer.token(), 'ANNOTATION_DATE', 'AnnotationDate', 2) - token_assert_helper(lexer.token(), 'DATE', '2010-01-29T18:30:22Z', 2) - token_assert_helper(lexer.token(), 'ANNOTATION_COMMENT', 'AnnotationComment', 3) - token_assert_helper(lexer.token(), 'TEXT', 'Document level annotation', 3) - token_assert_helper(lexer.token(), 'ANNOTATION_TYPE', 'AnnotationType', 4) - token_assert_helper(lexer.token(), 'LINE', 'OTHER', 4) - token_assert_helper(lexer.token(), 'ANNOTATION_SPDX_ID', 'SPDXREF', 5) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT', 5) + token_assert_helper(lexer.token(), "ANNOTATOR", "Annotator", 1) + token_assert_helper(lexer.token(), "PERSON_VALUE", "Person: Jane Doe()", 1) + token_assert_helper(lexer.token(), "ANNOTATION_DATE", "AnnotationDate", 2) + token_assert_helper(lexer.token(), "DATE", "2010-01-29T18:30:22Z", 2) + token_assert_helper(lexer.token(), "ANNOTATION_COMMENT", "AnnotationComment", 3) + token_assert_helper(lexer.token(), "TEXT", "Document level annotation", 3) + token_assert_helper(lexer.token(), "ANNOTATION_TYPE", "AnnotationType", 4) + token_assert_helper(lexer.token(), "LINE", "OTHER", 4) + token_assert_helper(lexer.token(), "ANNOTATION_SPDX_ID", "SPDXREF", 5) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT", 5) def test_tokenization_of_relationship(lexer): - relationship_str = '\n'.join(['Relationship: SPDXRef-DOCUMENT DESCRIBES NONE', - 'RelationshipComment: This is a comment.']) + relationship_str = "\n".join(["Relationship: SPDXRef-DOCUMENT DESCRIBES NONE", + "RelationshipComment: This is a comment."]) lexer.input(relationship_str) - token_assert_helper(lexer.token(), 'RELATIONSHIP', 'Relationship', 1) - token_assert_helper(lexer.token(), 'LINE', 'SPDXRef-DOCUMENT DESCRIBES NONE', 1) - token_assert_helper(lexer.token(), 'RELATIONSHIP_COMMENT', 'RelationshipComment', 2) - token_assert_helper(lexer.token(), 'LINE', 'This is a comment.', 2) + token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 1) + token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT DESCRIBES NONE", 1) + token_assert_helper(lexer.token(), "RELATIONSHIP_COMMENT", "RelationshipComment", 2) + token_assert_helper(lexer.token(), "LINE", "This is a comment.", 2) diff --git a/tests/spdx/parser/tagvalue/test_tag_value_parser.py b/tests/spdx/parser/tagvalue/test_tag_value_parser.py index 38755a24a..f194af2a7 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_parser.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_parser.py @@ -16,13 +16,13 @@ from spdx.model.document import Document from spdx.model.relationship import RelationshipType, Relationship from spdx.parser.error import SPDXParsingError -from spdx.parser.tagvalue.parser.tagvalue import Parser +from spdx.parser.tagvalue.parser import Parser from tests.spdx.parser.tagvalue.test_creation_info_parser import DOCUMENT_STR def test_parse_unknown_tag(): parser = Parser() - unknown_tag_str = 'UnknownTag: This is an example for an unknown tag.' + unknown_tag_str = "UnknownTag: This is an example for an unknown tag." with pytest.raises(SPDXParsingError, match="Unknown tag"): parser.parse(unknown_tag_str) diff --git a/tests/spdx/writer/tagvalue/test_tagvalue_writer.py b/tests/spdx/writer/tagvalue/test_tagvalue_writer.py index f4af5a505..f289be6de 100644 --- a/tests/spdx/writer/tagvalue/test_tagvalue_writer.py +++ b/tests/spdx/writer/tagvalue/test_tagvalue_writer.py @@ -13,7 +13,7 @@ import pytest -from spdx.parser.tagvalue.parser import tagvalue_parser +from spdx.parser.tagvalue import tagvalue_parser from tests.spdx.fixtures import document_fixture from spdx.writer.tagvalue.tagvalue_writer import write_document_to_file From 92379a6cc4ec7669b90cf812e402f7f0aad946ea Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 9 Mar 2023 08:23:41 +0100 Subject: [PATCH 42/43] squashed review commits with name fixes and comment improvement Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/helper_methods.py | 4 ++-- src/spdx/parser/tagvalue/lexer.py | 2 +- src/spdx/parser/tagvalue/parser.py | 21 +++++++++---------- .../parser/tagvalue/test_tag_value_lexer.py | 2 +- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/spdx/parser/tagvalue/helper_methods.py b/src/spdx/parser/tagvalue/helper_methods.py index f13204b78..26efc1407 100644 --- a/src/spdx/parser/tagvalue/helper_methods.py +++ b/src/spdx/parser/tagvalue/helper_methods.py @@ -57,7 +57,7 @@ def parse_checksum(checksum_str: str) -> Checksum: def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None, method_to_apply: Callable = lambda x: x): if not argument_name: - argument_name = get_property(parsed_value[1]) + argument_name = get_property_name(parsed_value[1]) if argument_name in dict_to_fill: dict_to_fill["logger"].append( f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}") @@ -72,7 +72,7 @@ def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argume dict_to_fill["logger"].append(f"Invalid {parsed_value[1]}: {parsed_value[2]}. Line: {parsed_value.lineno(1)}") -def get_property(tag: str): +def get_property_name(tag: str): if tag not in TAG_DATA_MODEL_FIELD.keys(): return camel_case_to_snake_case(tag) return TAG_DATA_MODEL_FIELD[tag][1] diff --git a/src/spdx/parser/tagvalue/lexer.py b/src/spdx/parser/tagvalue/lexer.py index 906e26067..b6dfca3bf 100644 --- a/src/spdx/parser/tagvalue/lexer.py +++ b/src/spdx/parser/tagvalue/lexer.py @@ -41,7 +41,7 @@ class SPDXLexer(object): # Package fields "PackageName": "PKG_NAME", "PackageVersion": "PKG_VERSION", - "PackageDownloadLocation": "PKG_DOWWNLOAD_LOCATION", + "PackageDownloadLocation": "PKG_DOWNLOAD_LOCATION", "FilesAnalyzed": "PKG_FILES_ANALYZED", "PackageSummary": "PKG_SUMMARY", "PackageSourceInfo": "PKG_SOURCE_INFO", diff --git a/src/spdx/parser/tagvalue/parser.py b/src/spdx/parser/tagvalue/parser.py index 8fbc3d019..573eede02 100644 --- a/src/spdx/parser/tagvalue/parser.py +++ b/src/spdx/parser/tagvalue/parser.py @@ -119,7 +119,7 @@ def p_attrib(self, p): "pkg_license_concluded : PKG_LICENSE_CONCLUDED error\n source_info : PKG_SOURCE_INFO error\n " "homepage : PKG_HOMEPAGE error\n pkg_checksum : PKG_CHECKSUM error\n " "verification_code : PKG_VERIFICATION_CODE error\n originator : PKG_ORIGINATOR error\n " - "download_location : PKG_DOWWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " + "download_location : PKG_DOWNLOAD_LOCATION error\n files_analyzed : PKG_FILES_ANALYZED error\n " "supplier : PKG_SUPPLIER error\n pkg_file_name : PKG_FILE_NAME error\n " "package_version : PKG_VERSION error\n primary_package_purpose : PRIMARY_PACKAGE_PURPOSE error\n " "built_date : BUILT_DATE error\n release_date : RELEASE_DATE error\n " @@ -150,7 +150,7 @@ def p_current_element_error(self, p): "package_name : PKG_NAME LINE\n description : PKG_DESCRIPTION text_or_line\n " "summary : PKG_SUMMARY text_or_line\n source_info : PKG_SOURCE_INFO text_or_line\n " "homepage : PKG_HOMEPAGE line_or_no_assertion_or_none\n " - "download_location : PKG_DOWWNLOAD_LOCATION line_or_no_assertion_or_none\n " + "download_location : PKG_DOWNLOAD_LOCATION line_or_no_assertion_or_none\n " "originator : PKG_ORIGINATOR actor_or_no_assertion\n supplier : PKG_SUPPLIER actor_or_no_assertion\n " "pkg_comment : PKG_COMMENT text_or_line\n " "pkg_copyright_text : PKG_COPYRIGHT_TEXT line_or_no_assertion_or_none\n " @@ -165,9 +165,7 @@ def p_current_element_error(self, p): "file_spdx_id : SNIPPET_FILE_SPDXID LINE\n " "snippet_license_concluded : SNIPPET_LICENSE_CONCLUDED license_or_no_assertion_or_none\n " "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " - "annotation_comment : ANNOTATION_COMMENT text_or_line\n " - - ) + "annotation_comment : ANNOTATION_COMMENT text_or_line") def p_generic_value(self, p): if p[1] in EXPECTED_START_TAG_ELEMENT.keys(): self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]]) @@ -206,8 +204,9 @@ def p_actor_values(self, p): @grammar_rule("spdx_id : SPDX_ID LINE") def p_spdx_id(self, p): - # We assume that the documents spdx_id is defined first in the SPDXDocument, before any package or file - # information. If this is not the case the parser will behave unexpectedly as the spdx_ids are assigned falsy. + # As all SPDX Ids share the same tag, there is no knowing which spdx_id belongs to the document. + # We assume that to be the first spdx_id we encounter. As the specification does not explicitly require this, + # our approach might lead to unwanted behavior when the document's SPDX Id is defined later in the document. if "spdx_id" in self.creation_info: self.current_element["spdx_id"] = p[2] else: @@ -302,7 +301,8 @@ def p_pkg_attribution_text(self, p): self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)) self.current_element.setdefault("attribution_texts", []).append(p[2]) - @grammar_rule("pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE") + @grammar_rule( + "pkg_external_ref : PKG_EXTERNAL_REF LINE PKG_EXTERNAL_REF_COMMENT text_or_line\n | PKG_EXTERNAL_REF LINE") def p_pkg_external_refs(self, p): if not self.check_that_current_element_matches_class_for_value(Package, p.lineno(1)): return @@ -516,9 +516,8 @@ def check_that_current_element_matches_class_for_value(self, expected_class, lin def construct_current_element(self): if "class" not in self.current_element: - # When the first element of the document is instantiated we don't have a current element in scope - # and the key "class" doesn't exist. Additionally, if the first element doesn't have the expected start - # value the key "class" wouldn't exist. To prevent a KeyError we use early return. + # This happens when the first element is initialized via initialize_new_current_element() or if the first + # element is missing its expected starting tag. In both cases we are unable to construct an element. return clazz = self.current_element.pop("class") diff --git a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py index 4d4b2cdb9..afef98f1b 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py @@ -159,7 +159,7 @@ def test_tokenization_of_package(lexer): token_assert_helper(lexer.token(), "LINE", "SPDXRef-Package", 2) token_assert_helper(lexer.token(), "PKG_VERSION", "PackageVersion", 3) token_assert_helper(lexer.token(), "LINE", "Version 0.9.2", 3) - token_assert_helper(lexer.token(), "PKG_DOWWNLOAD_LOCATION", "PackageDownloadLocation", 4) + token_assert_helper(lexer.token(), "PKG_DOWNLOAD_LOCATION", "PackageDownloadLocation", 4) token_assert_helper(lexer.token(), "LINE", "http://example.com/test", 4) token_assert_helper(lexer.token(), "PKG_FILES_ANALYZED", "FilesAnalyzed", 5) token_assert_helper(lexer.token(), "LINE", "True", 5) From 97a8de448b9243c07f2358b47964ab8ea6b69230 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Thu, 9 Mar 2023 08:39:21 +0100 Subject: [PATCH 43/43] squashed review commits [review] fix parsing of external document ref [review] use only one dictionary [review] return if multiple values for snippet range found Signed-off-by: Meret Behrens --- src/spdx/parser/tagvalue/helper_methods.py | 1 + src/spdx/parser/tagvalue/lexer.py | 20 +-------- src/spdx/parser/tagvalue/parser.py | 45 +++++++++---------- .../tagvalue/test_creation_info_parser.py | 30 +++++++------ .../tagvalue/test_relationship_parser.py | 2 +- .../parser/tagvalue/test_tag_value_lexer.py | 22 +++++---- 6 files changed, 55 insertions(+), 65 deletions(-) diff --git a/src/spdx/parser/tagvalue/helper_methods.py b/src/spdx/parser/tagvalue/helper_methods.py index 26efc1407..3f7ad0012 100644 --- a/src/spdx/parser/tagvalue/helper_methods.py +++ b/src/spdx/parser/tagvalue/helper_methods.py @@ -105,6 +105,7 @@ def get_property_name(tag: str): "SnippetComment": (Snippet, "comment"), "SnippetCopyrightText": (Snippet, "copyright_text"), "SnippetLicenseComments": (Snippet, "license_comment"), "SnippetLicenseConcluded": (Snippet, "license_concluded"), "SnippetByteRange": (Snippet, "byte_range"), "SnippetLineRange": (Snippet, "line_range"), + "Annotator": (Annotation, "annotator"), "SPDXREF": (Annotation, "spdx_id"), "AnnotationComment": (Annotation, "annotation_comment"), "LicenseID": (ExtractedLicensingInfo, "license_id"), "ExtractedText": (ExtractedLicensingInfo, "extracted_text"), "LicenseComment": (ExtractedLicensingInfo, "comment"), "LicenseName": (ExtractedLicensingInfo, "license_name") diff --git a/src/spdx/parser/tagvalue/lexer.py b/src/spdx/parser/tagvalue/lexer.py index b6dfca3bf..9229ad64e 100644 --- a/src/spdx/parser/tagvalue/lexer.py +++ b/src/spdx/parser/tagvalue/lexer.py @@ -109,10 +109,7 @@ class SPDXLexer(object): "PERSON_VALUE", "DATE", "LINE", - "CHECKSUM", - "EXT_DOC_REF_ID", - "EXT_DOC_URI", - "EXT_DOC_REF_CHECKSUM", + "CHECKSUM" ] + list(reserved.values()) def __init__(self): @@ -145,21 +142,6 @@ def t_CHECKSUM(self, t): t.value = t.value[1:].strip() return t - @TOKEN(r":\s*DocumentRef-([A-Za-z0-9\+\.\-]+)") - def t_EXT_DOC_REF_ID(self, t): - t.value = t.value[1:].strip() - return t - - @TOKEN(r"\s*((ht|f)tps?:\/\/\S*)") - def t_EXT_DOC_URI(self, t): - t.value = t.value.strip() - return t - - @TOKEN(r"\s*SHA1:\s*[a-f0-9]{40}") - def t_EXT_DOC_REF_CHECKSUM(self, t): - t.value = t.value[1:].strip() - return t - @TOKEN(r":\s*Tool:.+") def t_TOOL_VALUE(self, t): t.value = t.value[1:].strip() diff --git a/src/spdx/parser/tagvalue/parser.py b/src/spdx/parser/tagvalue/parser.py index 573eede02..c1f68dbfc 100644 --- a/src/spdx/parser/tagvalue/parser.py +++ b/src/spdx/parser/tagvalue/parser.py @@ -42,9 +42,6 @@ Package="packages", ExtractedLicensingInfo="extracted_licensing_info") ELEMENT_EXPECTED_START_TAG = dict(File="FileName", Annotation="Annotator", Relationship="Relationship", Snippet="SnippetSPDXID", Package="PackageName", ExtractedLicensingInfo="LicenseID") -EXPECTED_START_TAG_ELEMENT = {"FileName": File, "PackageName": Package, "Annotator": Annotation, - "Relationship": Relationship, "SnippetSPDXID": Snippet, - "LicenseID": ExtractedLicensingInfo} class Parser(object): @@ -135,8 +132,8 @@ def p_attrib(self, p): "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n " "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error") def p_current_element_error(self, p): - if p[1] in EXPECTED_START_TAG_ELEMENT.keys(): - self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]]) + if p[1] in ELEMENT_EXPECTED_START_TAG.values(): + self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) self.current_element["logger"].append( f"Error while parsing {p[1]}: Token did not match specified grammar rule. Line: {p.lineno(1)}") @@ -167,8 +164,8 @@ def p_current_element_error(self, p): "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n " "annotation_comment : ANNOTATION_COMMENT text_or_line") def p_generic_value(self, p): - if p[1] in EXPECTED_START_TAG_ELEMENT.keys(): - self.initialize_new_current_element(EXPECTED_START_TAG_ELEMENT[p[1]]) + if p[1] in ELEMENT_EXPECTED_START_TAG.values(): + self.initialize_new_current_element(TAG_DATA_MODEL_FIELD[p[1]][0]) if self.check_that_current_element_matches_class_for_value(TAG_DATA_MODEL_FIELD[p[1]][0], p.lineno(1)): set_value(p, self.current_element) @@ -232,11 +229,22 @@ def p_generic_value_creation_info(self, p): def p_license_list_version(self, p): set_value(p, self.creation_info, method_to_apply=Version.from_string) - @grammar_rule("ext_doc_ref : EXT_DOC_REF EXT_DOC_REF_ID EXT_DOC_URI EXT_DOC_REF_CHECKSUM") + @grammar_rule("ext_doc_ref : EXT_DOC_REF LINE") def p_external_document_ref(self, p): - document_ref_id = p[2] - document_uri = p[3] - checksum = parse_checksum(p[4]) + external_doc_ref_regex = re.compile(r"(.*)(\s*SHA1:\s*[a-f0-9]{40})") + external_doc_ref_match = external_doc_ref_regex.match(p[2]) + if not external_doc_ref_match: + self.creation_info["logger"].append( + f"Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: {p.lineno(1)}") + return + try: + document_ref_id, document_uri = external_doc_ref_match.group(1).strip().split(" ") + except ValueError: + self.creation_info["logger"].append( + f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into " + f"document_ref_id and document_uri. Line: {p.lineno(1)}") + return + checksum = parse_checksum(external_doc_ref_match.group(2).strip()) external_document_ref = ExternalDocumentRef(document_ref_id, document_uri, checksum) self.creation_info.setdefault("external_document_refs", []).append(external_document_ref) @@ -415,6 +423,7 @@ def p_snippet_range(self, p): if argument_name in self.current_element: self.current_element["logger"].append( f"Multiple values for {p[1]} found. Line: {p.lineno(1)}") + return range_re = re.compile(r"^(\d+):(\d+)$", re.UNICODE) if not range_re.match(p[2].strip()): self.current_element["logger"].append(f"Value for {p[1]} doesn't match valid range pattern. " @@ -443,8 +452,8 @@ def p_annotation_type(self, p): # parsing methods for relationship - @grammar_rule("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n " - "| RELATIONSHIP relationship_value") + @grammar_rule("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n " + "| RELATIONSHIP LINE") def p_relationship(self, p): self.initialize_new_current_element(Relationship) try: @@ -467,16 +476,6 @@ def p_relationship(self, p): if len(p) == 5: self.current_element["comment"] = p[4] - @grammar_rule("relationship_value : EXT_DOC_REF_ID LINE") - def p_relationship_value_with_doc_ref(self, p): - - p[0] = p[1] + ":" + p[2] - - @grammar_rule("relationship_value : LINE") - def p_relationship_value_without_doc_ref(self, p): - - p[0] = p[1] - def p_error(self, p): pass diff --git a/tests/spdx/parser/tagvalue/test_creation_info_parser.py b/tests/spdx/parser/tagvalue/test_creation_info_parser.py index 2d789229b..98971e24c 100644 --- a/tests/spdx/parser/tagvalue/test_creation_info_parser.py +++ b/tests/spdx/parser/tagvalue/test_creation_info_parser.py @@ -69,20 +69,22 @@ def test_parse_creation_info(): "Creator: Person Bob (bob@example.com)", "Creator: Organization: Acme [email]", "Created: 2010-02-03T00:00:0Z", "CreatorComment: Sample Comment", "LicenseListVersion: 7"]), - "Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: " - "Token did not match specified grammar rule. Line: 6', 'Error while parsing " - "ExternalDocumentRef: Token did not match specified grammar rule. Line: 7', " - "'Error while parsing Creator: Token did not match specified grammar rule. Line: 8', " - "'Error while parsing Created: Token did not match specified grammar rule. Line: 10', " - "'7 is not a valid version string']"), - ("\n".join( - ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3", - "SPDXID: SPDXRef-DOCUMENT"]), - r"__init__() missing 3 required positional arguments: 'document_namespace', " - r"'creators', and 'created'"), - ("LicenseListVersion: 3.5\nLicenseListVersion: 3.7", - "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. " - "Line: 2']")])) + ("Error while parsing CreationInfo: ['Error while parsing DocumentNamespace: " + 'Token did not match specified grammar rule. Line: 6\', "Error while parsing ' + "ExternalDocumentRef: Couldn't split the first part of the value into " + 'document_ref_id and document_uri. Line: 7", \'Error while parsing Creator: ' + "Token did not match specified grammar rule. Line: 8', 'Error while parsing " + "Created: Token did not match specified grammar rule. Line: 10', '7 is not a " + "valid version string']")), + ("\n".join( + ["SPDXVersion: SPDX-2.3", "DataLicense: CC0-1.0", "DocumentName: Sample_Document-V2.3", + "SPDXID: SPDXRef-DOCUMENT"]), + r"__init__() missing 3 required positional arguments: 'document_namespace', 'creators', and 'created'"), + ("LicenseListVersion: 3.5\nLicenseListVersion: 3.7", + "Error while parsing CreationInfo: ['Multiple values for LicenseListVersion found. Line: 2']"), + ("ExternalDocumentRef: Document_ref document_uri SHA1: afded", + 'Error while parsing CreationInfo: ["Error while parsing ExternalDocumentRef: Couldn\'t match Checksum. Line: 1"]' + )])) def test_parse_invalid_creation_info(document_str, expected_message): parser = Parser() with pytest.raises(SPDXParsingError) as err: diff --git a/tests/spdx/parser/tagvalue/test_relationship_parser.py b/tests/spdx/parser/tagvalue/test_relationship_parser.py index 18a6ee3b8..90ef9da4e 100644 --- a/tests/spdx/parser/tagvalue/test_relationship_parser.py +++ b/tests/spdx/parser/tagvalue/test_relationship_parser.py @@ -28,7 +28,7 @@ SpdxNoAssertion())), ("Relationship: SPDXRef-CarolCompression DEPENDS_ON NONE", Relationship("SPDXRef-CarolCompression", RelationshipType.DEPENDS_ON, SpdxNone())), - ("Relationship: DocumentRef-ExternalDocument: SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef", + ("Relationship: DocumentRef-ExternalDocument:SPDXRef-Test DEPENDS_ON DocumentRef:AnotherRef", Relationship("DocumentRef-ExternalDocument:SPDXRef-Test", RelationshipType.DEPENDS_ON, "DocumentRef:AnotherRef")) ]) diff --git a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py index afef98f1b..0aaf0d864 100644 --- a/tests/spdx/parser/tagvalue/test_tag_value_lexer.py +++ b/tests/spdx/parser/tagvalue/test_tag_value_lexer.py @@ -53,15 +53,18 @@ def test_tokenization_of_document(lexer): def test_tokenization_of_external_document_references(lexer): - data = """ - ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759 - """ + data = "\n".join([ + "ExternalDocumentRef:DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + "ExternalDocumentRef:DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759"]) lexer.input(data) + token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 1) + token_assert_helper(lexer.token(), "LINE", + "DocumentRef-spdx-tool-2.1 http://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + 1) token_assert_helper(lexer.token(), "EXT_DOC_REF", "ExternalDocumentRef", 2) - token_assert_helper(lexer.token(), "EXT_DOC_REF_ID", "DocumentRef-spdx-tool-2.1", 2) - token_assert_helper(lexer.token(), "EXT_DOC_URI", "http://spdx.org/spdxdocs/spdx-tools-v2.1-3F25" - "04E0-4F89-41D3-9A0C-0305E82C3301", 2) - token_assert_helper(lexer.token(), "EXT_DOC_REF_CHECKSUM", "SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", 2) + token_assert_helper(lexer.token(), "LINE", + "DocumentRef-spdx-tool-2.1 ldap://[2001:db8::7]/c=GB?objectClass?one SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", + 2) def test_tokenization_of_file(lexer): @@ -277,10 +280,13 @@ def test_tokenization_of_annotation(lexer): def test_tokenization_of_relationship(lexer): relationship_str = "\n".join(["Relationship: SPDXRef-DOCUMENT DESCRIBES NONE", - "RelationshipComment: This is a comment."]) + "RelationshipComment: This is a comment.", + "Relationship: DocumentRef-extern:SPDXRef-Package DESCRIBES NONE"]) lexer.input(relationship_str) token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 1) token_assert_helper(lexer.token(), "LINE", "SPDXRef-DOCUMENT DESCRIBES NONE", 1) token_assert_helper(lexer.token(), "RELATIONSHIP_COMMENT", "RelationshipComment", 2) token_assert_helper(lexer.token(), "LINE", "This is a comment.", 2) + token_assert_helper(lexer.token(), "RELATIONSHIP", "Relationship", 3) + token_assert_helper(lexer.token(), "LINE", "DocumentRef-extern:SPDXRef-Package DESCRIBES NONE", 3)