spdx · meretp · Mar 9, 2023 · Feb 21, 2023 · Feb 27, 2023 · Feb 27, 2023
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,7 @@ __pycache__/
 /build/
 /dist/
 /tmp/
-spdx/parsers/parsetab.py
+src/spdx/parser/tagvalue/parsetab.py
 /.cache/
 
 .tox

diff --git a/README.md b/README.md
@@ -131,6 +131,7 @@ if not validation_messages:
 * PyYAML: https://pypi.org/project/PyYAML/ for handling YAML.
 * xmltodict: https://pypi.org/project/xmltodict/ for handling XML.
 * rdflib: https://pypi.python.org/pypi/rdflib/ for handling RDF.
+* ply: https://pypi.org/project/ply/ for handling tag-value.
 * click: https://pypi.org/project/click/ for creating the CLI interface.
 * typeguard: https://pypi.org/project/typeguard/ for type checking.
 * uritools: https://pypi.org/project/uritools/ for validation of URIs.

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
 ]
 urls = { Homepage = "https://github.com/spdx/tools-python" }
 requires-python = ">=3.7"
-dependencies = ["click", "pyyaml", "xmltodict", "rdflib", "typeguard", "uritools", "license_expression"]
+dependencies = ["click", "pyyaml", "xmltodict", "rdflib", "typeguard", "uritools", "license_expression", "ply"]
 dynamic = ["version"]
 
 [project.optional-dependencies]

diff --git a/src/spdx/model/package.py b/src/spdx/model/package.py
@@ -55,9 +55,9 @@ class ExternalPackageRefCategory(Enum):
 
 
 CATEGORY_TO_EXTERNAL_PACKAGE_REF_TYPES: Dict[ExternalPackageRefCategory, List[str]] = {
-    ExternalPackageRefCategory.SECURITY : ["cpe22Type", "cpe23Type", "advisory", "fix", "url", "swid"],
-    ExternalPackageRefCategory.PACKAGE_MANAGER : ["maven-central", "npm", "nuget", "bower", "purl"],
-    ExternalPackageRefCategory.PERSISTENT_ID : ["swh", "gitoid"],
+    ExternalPackageRefCategory.SECURITY: ["cpe22Type", "cpe23Type", "advisory", "fix", "url", "swid"],
+    ExternalPackageRefCategory.PACKAGE_MANAGER: ["maven-central", "npm", "nuget", "bower", "purl"],
+    ExternalPackageRefCategory.PERSISTENT_ID: ["swh", "gitoid"],
     ExternalPackageRefCategory.OTHER: []
 }
 

diff --git a/src/spdx/parser/actor_parser.py b/src/spdx/parser/actor_parser.py
@@ -29,14 +29,20 @@ def parse_actor(actor: str) -> Actor:
 
         if tool_match:
             name: str = tool_match.group(1).strip()
+            if not name:
+                raise SPDXParsingError([f"No name for Tool provided: {actor}."])
             creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.TOOL, name=name))
 
         elif person_match:
             name: str = person_match.group(1).strip()
+            if not name:
+                raise SPDXParsingError([f"No name for Person provided: {actor}."])
             email: Optional[str] = ActorParser.get_email_or_none(person_match)
             creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.PERSON, name=name, email=email))
         elif org_match:
             name: str = org_match.group(1).strip()
+            if not name:
+                raise SPDXParsingError([f"No name for Organization provided: {actor}."])
             email: Optional[str] = ActorParser.get_email_or_none(org_match)
             creator = construct_or_raise_parsing_error(Actor,
                                                        dict(actor_type=ActorType.ORGANIZATION, name=name, email=email))

diff --git a/src/spdx/parser/parse_anything.py b/src/spdx/parser/parse_anything.py
@@ -11,6 +11,7 @@
 from spdx.formats import file_name_to_format, FileFormat
 from spdx.parser.json import json_parser
 from spdx.parser.rdf import rdf_parser
+from spdx.parser.tagvalue import tagvalue_parser
 from spdx.parser.xml import xml_parser
 from spdx.parser.yaml import yaml_parser
 
@@ -20,7 +21,7 @@ def parse_file(file_name: str):
     if input_format == FileFormat.RDF_XML:
         return rdf_parser.parse_from_file(file_name)
     elif input_format == FileFormat.TAG_VALUE:
-        raise NotImplementedError("Currently, the tag-value parser is not implemented")
+        return tagvalue_parser.parse_from_file(file_name)
     elif input_format == FileFormat.JSON:
         return json_parser.parse_from_file(file_name)
     elif input_format == FileFormat.XML:

diff --git a/src/spdx/parser/parsing_functions.py b/src/spdx/parser/parsing_functions.py
@@ -20,6 +20,8 @@ def construct_or_raise_parsing_error(object_to_construct: Any, args_for_construc
         constructed_object = object_to_construct(**args_for_construction)
     except ConstructorTypeErrors as err:
         raise SPDXParsingError([f"Error while constructing {object_to_construct.__name__}: {err.get_messages()}"])
+    except TypeError as err:
+        raise SPDXParsingError([f"Error while constructing {object_to_construct.__name__}: {err.args[0]}"])
     return constructed_object
 
 

diff --git a/src/spdx/parser/tagvalue/__init__.py b/src/spdx/parser/tagvalue/__init__.py
diff --git a/src/spdx/parser/tagvalue/helper_methods.py b/src/spdx/parser/tagvalue/helper_methods.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2023 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Optional, Callable, Any, Dict
+
+from ply.yacc import YaccProduction
+
+from spdx.casing_tools import camel_case_to_snake_case
+from spdx.model.annotation import Annotation
+from spdx.model.checksum import Checksum, ChecksumAlgorithm
+from spdx.model.document import CreationInfo
+from spdx.model.extracted_licensing_info import ExtractedLicensingInfo
+from spdx.model.file import File
+from spdx.model.package import Package
+from spdx.model.snippet import Snippet
+from spdx.parser.error import SPDXParsingError
+
+
+def grammar_rule(doc):
+    # this is a helper method to use decorators for the parsing methods instead of docstrings
+    def decorate(func):
+        func.__doc__ = doc
+        return func
+
+    return decorate
+
+
+def str_from_text(text: Optional[str]) -> Optional[str]:
+    regex = re.compile("<text>((.|\n)+)</text>", re.UNICODE)
+    match = regex.match(text)
+    if match:
+        return match.group(1)
+    elif isinstance(text, str):
+        return text
+    else:
+        return None
+
+
+def parse_checksum(checksum_str: str) -> Checksum:
+    # The lexer and the corresponding regex for the token CHECKSUM and EXT_DOC_REF_CHECKSUM ensure that the passed
+    # checksum_str is formatted in the way that the following lines of code can't cause an error.
+    algorithm, value = checksum_str.split(":")
+    algorithm = ChecksumAlgorithm[algorithm.upper().replace("-", "_")]
+    value = value.strip()
+    checksum = Checksum(algorithm, value)
+    return checksum
+
+
+def set_value(parsed_value: YaccProduction, dict_to_fill: Dict[str, Any], argument_name: Optional[str] = None,
+              method_to_apply: Callable = lambda x: x):
+    if not argument_name:
+        argument_name = get_property_name(parsed_value[1])
+    if argument_name in dict_to_fill:
+        dict_to_fill["logger"].append(
+            f"Multiple values for {parsed_value[1]} found. Line: {parsed_value.lineno(1)}")
+        return
+    try:
+        dict_to_fill[argument_name] = method_to_apply(parsed_value[2])
+    except SPDXParsingError as err:
+        dict_to_fill["logger"].append(err.get_messages())
+    except ValueError as err:
+        dict_to_fill["logger"].append(err.args[0])
+    except KeyError:
+        dict_to_fill["logger"].append(f"Invalid {parsed_value[1]}: {parsed_value[2]}. Line: {parsed_value.lineno(1)}")
+
+
+def get_property_name(tag: str):
+    if tag not in TAG_DATA_MODEL_FIELD.keys():
+        return camel_case_to_snake_case(tag)
+    return TAG_DATA_MODEL_FIELD[tag][1]
+
+
+# This dictionary serves as a mapping from a tag to the corresponding class and field in the internal data model.
+# This mapping is not complete as we only list the values which can be parsed by a generic method and don't need any
+# individual logic.
+TAG_DATA_MODEL_FIELD = {
+    "SPDXVersion": (CreationInfo, "spdx_version"), "DataLicense": (CreationInfo, "data_license"),
+    "DocumentName": (CreationInfo, "name"), "DocumentComment": (CreationInfo, "document_comment"),
+    "DocumentNamespace": (CreationInfo, "document_namespace"), "Creator": (CreationInfo, "creator"),
+    "Created": (CreationInfo, "created"), "CreatorComment": (CreationInfo, "creator_comment"),
+    "LicenseListVersion": (CreationInfo, "license_list_version"),
+    "ExternalDocumentRef": (CreationInfo, "external_document_refs"),
+    "FileName": (File, "name"), "FileType": (File, "file_type"), "FileChecksum": (File, "checksums"),
+    "FileNotice": (File, "notice"), "FileCopyrightText": (File, "copyright_text"),
+    "LicenseComments": (File, "license_comment"), "FileComment": (File, "comment"),
+    "LicenseConcluded": (File, "license_concluded"), "LicenseDeclared": (File, "license_declared"),
+    "PackageName": (Package, "name"), "PackageComment": (Package, "comment"),
+    "PackageCopyrightText": (Package, "copyright_text"), "PackageLicenseComments": (Package, "license_comment"),
+    "PackageLicenseDeclared": (Package, "license_declared"), "PackageLicenseConcluded": (Package, "license_concluded"),
+    "PackageFileName": (Package, "file_name"), "PackageVersion": (Package, "version"),
+    "PackageDownloadLocation": (Package, "download_location"), "PackageSummary": (Package, "summary"),
+    "PackageSourceInfo": (Package, "source_info"), "PackageSupplier": (Package, "supplier"),
+    "PackageOriginator": (Package, "originator"), "PackageDescription": (Package, "description"),
+    "PackageHomePage": (Package, "homepage"),
+    "SnippetSPDXID": (Snippet, "spdx_id"), "SnippetFromFileSPDXID": (Snippet, "file_spdx_id"),
+    "SnippetName": (Snippet, "name"),
+    "SnippetComment": (Snippet, "comment"), "SnippetCopyrightText": (Snippet, "copyright_text"),
+    "SnippetLicenseComments": (Snippet, "license_comment"), "SnippetLicenseConcluded": (Snippet, "license_concluded"),
+    "SnippetByteRange": (Snippet, "byte_range"), "SnippetLineRange": (Snippet, "line_range"),
+    "Annotator": (Annotation, "annotator"),
+    "SPDXREF": (Annotation, "spdx_id"), "AnnotationComment": (Annotation, "annotation_comment"),
+    "LicenseID": (ExtractedLicensingInfo, "license_id"), "ExtractedText": (ExtractedLicensingInfo, "extracted_text"),
+    "LicenseComment": (ExtractedLicensingInfo, "comment"), "LicenseName": (ExtractedLicensingInfo, "license_name")
+}