Skip to content

Commit

Permalink
Merge pull request #188 from tardyp/sbom
Browse files Browse the repository at this point in the history
Add spdxlite 2.2 SBOM parsing support

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
  • Loading branch information
pombredanne authored Aug 24, 2021
2 parents 794ddf1 + 7b93955 commit 21ea183
Show file tree
Hide file tree
Showing 17 changed files with 658 additions and 357 deletions.
14 changes: 10 additions & 4 deletions spdx/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ def __init__(
self.verif_exc_files = []
self.pkg_ext_refs = []

@property
def are_files_analyzed(self):
return self.files_analyzed is not False
# as default None Value is False, previous line is simplification of
# return self.files_analyzed or self.files_analyzed is None

def add_file(self, fil):
self.files.append(fil)

Expand Down Expand Up @@ -132,7 +138,7 @@ def validate_files_analyzed(self, messages):
messages.append(
'Package files_analyzed must be True or False or None (omitted)'
)
if self.files_analyzed is False and self.verif_code is not None:
if not self.are_files_analyzed and self.verif_code is not None:
messages.append(
'Package verif_code must be None (omitted) when files_analyzed is False'
)
Expand Down Expand Up @@ -200,13 +206,13 @@ def validate_mandatory_fields(self, messages):
"spdx.document.License"
)

if not self.licenses_from_files:
if not self.licenses_from_files and self.are_files_analyzed:
messages.append("Package licenses_from_files can not be empty")

return messages

def validate_files(self, messages):
if self.files_analyzed != False:
if self.are_files_analyzed:
if not self.files:
messages.append(
"Package must have at least one file."
Expand Down Expand Up @@ -240,7 +246,7 @@ def validate_mandatory_str_fields(self, messages):
docstring must be of a type that provides __str__ method.
"""
FIELDS = ["name", "spdx_id", "download_location", "cr_text"]
if self.files_analyzed != False:
if self.are_files_analyzed:
FIELDS = FIELDS + ["verif_code"]
self.validate_str_fields(FIELDS, False, messages)

Expand Down
2 changes: 1 addition & 1 deletion spdx/parsers/jsonparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ def __init__(self, builder, logger):
super(Parser, self).__init__(builder, logger)

def parse(self, file):
self.document_object = json.load(file).get("Document")
self.json_yaml_set_document(json.load(file))
return super(Parser, self).parse()
68 changes: 68 additions & 0 deletions spdx/parsers/jsonyamlxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,14 +1035,22 @@ class PackageParser(BaseParser):
def __init__(self, builder, logger):
super(PackageParser, self).__init__(builder, logger)

@property
def package(self):
# current package being parsed is the last one
return self.document.packages[-1]

def parse_package(self, package):
"""
Parse Package Information fields
- package: Python dict with Package Information fields in it
"""
if isinstance(package, dict):
# The builder has the notion of current package, here, we force to start a new one
self.builder.reset_package()
self.parse_pkg_name(package.get("name"))
self.parse_pkg_id(package.get("SPDXID"))
self.parse_pkg_files_analyzed(package.get("filesAnalyzed"))
self.parse_pkg_version(package.get("versionInfo"))
self.parse_pkg_file_name(package.get("packageFileName"))
self.parse_pkg_supplier(package.get("supplier"))
Expand Down Expand Up @@ -1176,11 +1184,34 @@ def parse_pkg_down_location(self, pkg_down_location):
else:
self.value_error("PKG_DOWN_LOC", pkg_down_location)

def parse_pkg_files_analyzed(self, pkg_files_analyzed):
"""
Parse Package files analyzed
- pkg_files_analyzed: Python boolean
"""
# Files Analyzed optional
if pkg_files_analyzed is None:
return
if isinstance(pkg_files_analyzed, bool):
try:
return self.builder.set_pkg_files_analyzed(
self.document, pkg_files_analyzed
)
except CardinalityError:
self.more_than_one_error("PKG_FILES_ANALYZED")
else:
self.value_error("PKG_FILES_ANALYZED", pkg_files_analyzed)

def parse_pkg_verif_code_field(self, pkg_verif_code_field):
"""
Parse Package verification code dict
- pkg_verif_code_field: Python dict('value':str/unicode, 'excludedFilesNames':list)
"""
if not self.package.are_files_analyzed:
if pkg_verif_code_field is not None:
self.value_error("PKG_VERIF_CODE_FIELD", pkg_verif_code_field)
return

if isinstance(pkg_verif_code_field, dict):
self.parse_pkg_verif_exc_files(
pkg_verif_code_field.get("packageVerificationCodeExcludedFiles")
Expand All @@ -1194,6 +1225,11 @@ def parse_pkg_verif_code(self, pkg_verif_code):
Parse Package verification code value
- pkg_verif_code: Python str/unicode
"""
if not self.package.are_files_analyzed:
if pkg_verif_code is not None:
self.value_error("PKG_VERIF_CODE", pkg_verif_code)
return

if isinstance(pkg_verif_code, str):
try:
return self.builder.set_pkg_verif_code(self.document, pkg_verif_code)
Expand Down Expand Up @@ -1284,6 +1320,10 @@ def parse_pkg_license_info_from_files(self, license_info_from_files):
Parse Package license information from files
- license_info_from_files: Python list of licenses information from files (str/unicode)
"""
if not self.package.are_files_analyzed:
if license_info_from_files is not None:
self.value_error("PKG_LIC_FRM_FILES", license_info_from_files)
return
if isinstance(license_info_from_files, list):
for license_info_from_file in license_info_from_files:
if isinstance(license_info_from_file, str):
Expand Down Expand Up @@ -1416,6 +1456,11 @@ def parse_pkg_files(self, pkg_files):
Parse Package files
- pkg_files: Python list of dicts as in FileParser.parse_file
"""
if not self.package.are_files_analyzed:
if pkg_files is not None:
self.value_error("PKG_FILES", pkg_files)
return

if isinstance(pkg_files, list):
for pkg_file in pkg_files:
if isinstance(pkg_file, dict):
Expand Down Expand Up @@ -1455,6 +1500,13 @@ class Parser(
def __init__(self, builder, logger):
super(Parser, self).__init__(builder, logger)

def json_yaml_set_document(self, data):
# we could verify that the spdxVersion >= 2.2, but we try to be resilient in parsing
if data.get("spdxVersion"):
self.document_object = data
return
self.document_object = data.get("Document")

def parse(self):
"""
Parse Document Information fields
Expand Down Expand Up @@ -1484,6 +1536,8 @@ def parse(self):
self.parse_reviews(self.document_object.get("reviewers"))
self.parse_snippets(self.document_object.get("snippets"))

self.parse_packages(self.document_object.get("packages"))

self.parse_doc_described_objects(self.document_object.get("documentDescribes"))

validation_messages = ErrorMessages()
Expand Down Expand Up @@ -1605,3 +1659,17 @@ def parse_doc_described_objects(self, doc_described_objects):
return True
else:
self.value_error("DOC_DESCRIBES", doc_described_objects)


def parse_packages(self, packages):
"""
Parse SPDXLite packages list
"""
if packages is None:
return
if isinstance(packages, list):
for package in packages:
self.parse_package(package)
return True
else:
self.value_error("PACKAGES", packages)
2 changes: 1 addition & 1 deletion spdx/parsers/parse_anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def parse_file(fn):
parsing_module = jsonparser
elif fn.endswith(".xml"):
parsing_module = xmlparser
elif fn.endswith(".yaml"):
elif fn.endswith(".yaml") or fn.endswith(".yml"):
parsing_module = yamlparser
else:
raise FileTypeError("FileType Not Supported" + str(fn))
Expand Down
6 changes: 4 additions & 2 deletions spdx/parsers/tagvaluebuilders.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,10 +712,12 @@ def set_pkg_files_analyzed(self, doc, files_analyzed):
"""
self.assert_package_exists()
if not self.package_files_analyzed_set:
if files_analyzed:
if files_analyzed is not None:
if validations.validate_pkg_files_analyzed(files_analyzed):
self.package_files_analyzed_set = True
doc.packages[-1].files_analyzed = (files_analyzed.lower() == "true")
if isinstance(files_analyzed, str):
files_analyzed = files_analyzed.lower() == "true"
doc.packages[-1].files_analyzed = files_analyzed
# convert to boolean;
# validate_pkg_files_analyzed already checked if
# files_analyzed is in ['True', 'true', 'False', 'false']
Expand Down
2 changes: 1 addition & 1 deletion spdx/parsers/validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def validate_pkg_spdx_id(value, optional=False):


def validate_pkg_files_analyzed(value, optional=False):
if value in ["True", "true", "False", "false"]:
if value in ["True", "true", "False", "false", True, False]:
return True
else:
return optional
Expand Down
2 changes: 1 addition & 1 deletion spdx/parsers/yamlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ def __init__(self, builder, logger):
super(Parser, self).__init__(builder, logger)

def parse(self, file):
self.document_object = yaml.safe_load(file).get("Document")
self.json_yaml_set_document(yaml.safe_load(file))
return super(Parser, self).parse()
12 changes: 6 additions & 6 deletions spdx/writers/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,12 +798,12 @@ def create_package_node(self, package):
# Handle package verification
if package.files_analyzed != False:
verif_node = self.package_verif_node(package)
verif_triple = (
package_node,
self.spdx_namespace.packageVerificationCode,
verif_node,
)
self.graph.add(verif_triple)
verif_triple = (
package_node,
self.spdx_namespace.packageVerificationCode,
verif_node,
)
self.graph.add(verif_triple)
# Handle concluded license
conc_lic_node = self.license_or_special(package.conc_lics)
conc_lic_triple = (
Expand Down
144 changes: 144 additions & 0 deletions tests/data/doc_parse/SBOMexpected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"id": "SPDXRef-DOCUMENT",
"specVersion": {
"major": 2,
"minor": 2
},
"documentNamespace": "http://spdx.org/spdxdocs/spdx-document-xyz",
"name": "xyz-0.1.0",
"comment": null,
"dataLicense": {
"type": "Single",
"identifier": "CC0-1.0",
"name": "Creative Commons Zero v1.0 Universal"
},
"licenseListVersion": {
"major": 3,
"minor": 9
},
"creators": [
{
"name": "Example Inc.",
"email": null,
"type": "Organization"
},
{
"name": "Thomas Steenbergen",
"email": null,
"type": "Person"
}
],
"created": "2020-07-23T18:30:22Z",
"creatorComment": null,
"packages": [
{
"id": "SPDXRef-Package-xyz",
"name": "xyz",
"packageFileName": null,
"summary": "Awesome product created by Example Inc.",
"description": null,
"versionInfo": "0.1.0",
"sourceInfo": null,
"downloadLocation": "git+ssh://gitlab.example.com:3389/products/xyz.git@b2c358080011af6a366d2512a25a379fbe7b1f78",
"homepage": "https://example.com/products/xyz",
"originator": null,
"supplier": null,
"licenseConcluded": {
"type": "Single",
"identifier": "NOASSERTION",
"name": "NOASSERTION"
},
"licenseDeclared": {
"type": "Conjunction",
"identifier": [
"Apache-2.0",
"LicenseRef-Proprietary-ExampleInc",
"curl"
],
"name": [
"Apache License 2.0",
"LicenseRef-Proprietary-ExampleInc",
"curl License"
]
},
"copyrightText": "copyright 2004-2020 Example Inc. All Rights Reserved.",
"licenseComment": null,
"checksum": null,
"files": [],
"licenseInfoFromFiles": [],
"verificationCode": {
"value": null,
"excludedFilesNames": []
}
},
{
"id": "SPDXRef-Package-curl",
"name": "curl",
"packageFileName": "./libs/curl",
"summary": null,
"description": "A command line tool and library for transferring data with URL syntax, supporting HTTP, HTTPS, FTP, FTPS, GOPHER, TFTP, SCP, SFTP, SMB, TELNET, DICT, LDAP, LDAPS, MQTT, FILE, IMAP, SMTP, POP3, RTSP and RTMP. libcurl offers a myriad of powerful features.",
"versionInfo": "7.70.0",
"sourceInfo": null,
"downloadLocation": "https://github.com/curl/curl/releases/download/curl-7_70_0/curl-7.70.0.tar.gz",
"homepage": "https://curl.haxx.se/",
"originator": null,
"supplier": null,
"licenseConcluded": {
"type": "Single",
"identifier": "NOASSERTION",
"name": "NOASSERTION"
},
"licenseDeclared": {
"type": "Single",
"identifier": "curl",
"name": "curl License"
},
"copyrightText": "Copyright (c) 1996 - 2020, Daniel Stenberg, <daniel@haxx.se>, and many contributors, see the THANKS file.",
"licenseComment": null,
"checksum": null,
"files": [],
"licenseInfoFromFiles": [],
"verificationCode": {
"value": null,
"excludedFilesNames": []
}
},
{
"id": "SPDXRef-Package-openssl",
"name": "openssl",
"packageFileName": "./libs/openssl",
"summary": null,
"description": "OpenSSL is a robust, commercial-grade, full-featured Open Source Toolkit for the Transport Layer Security (TLS) protocol formerly known as the Secure Sockets Layer (SSL) protocol. The protocol implementation is based on a full-strength general purpose cryptographic library, which can also be used stand-alone.",
"versionInfo": "1.1.1g",
"sourceInfo": null,
"downloadLocation": "git+ssh://github.com/openssl/openssl.git@e2e09d9fba1187f8d6aafaa34d4172f56f1ffb72",
"homepage": "https://www.openssl.org/",
"originator": null,
"supplier": null,
"licenseConcluded": {
"type": "Single",
"identifier": "NOASSERTION",
"name": "NOASSERTION"
},
"licenseDeclared": {
"type": "Single",
"identifier": "Apache-2.0",
"name": "Apache License 2.0"
},
"copyrightText": "copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.",
"licenseComment": null,
"checksum": null,
"files": [],
"licenseInfoFromFiles": [],
"verificationCode": {
"value": null,
"excludedFilesNames": []
}
}
],
"externalDocumentRefs": [],
"extractedLicenses": [],
"annotations": [],
"reviews": [],
"snippets": []
}
Loading

0 comments on commit 21ea183

Please sign in to comment.