Skip to content

Commit

Permalink
Add md5sums file list to distroless container (#2065)
Browse files Browse the repository at this point in the history
* Add md5sums file list to distroless container

This ensures that a "distroless" container layer tarball built from
Debian packages contains not only the control file of each package, but
also the md5sums file that lists original files included in a package.
If present, we extract the md5sums file and save is side-by-side with
the package control file under this path:
  var/lib/dpkg/status.d/<package-name>.md5sums

Reference: #1876
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>

* Remove trailing whitespaces

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
  • Loading branch information
pombredanne authored May 30, 2022
1 parent cc315a1 commit 558ae6f
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 9 deletions.
33 changes: 25 additions & 8 deletions container/build_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class DebError(Exception):
PKG_NAME_RE = re.compile(r'Package:\s*(?P<pkg_name>\w+).*')
DPKG_STATUS_DIR = '/var/lib/dpkg/status.d'
PKG_METADATA_FILE = 'control'
PKG_MD5SUMS_FILE = 'md5sums'

@staticmethod
def parse_pkg_name(metadata, filename):
Expand Down Expand Up @@ -222,20 +223,36 @@ def write_temp_file(self, data, suffix='tar', mode='wb'):
os.remove(tmpfile)

def add_pkg_metadata(self, metadata_tar, deb):
"""
Extract the package ``control`` metadata file from a Debian `metadata_tar`
tarball file to the status.d directory. Also extract the ``md5sums`` files
list file if present.
"""
try:
with tarfile.open(metadata_tar) as tar:
tar_members = tar.getmembers()
# Metadata is expected to be in a file.
control_file_member = list(filter(lambda f: os.path.basename(f.name) == TarFile.PKG_METADATA_FILE, tar.getmembers()))
control_file_member = list(filter(lambda f: os.path.basename(f.name) == TarFile.PKG_METADATA_FILE, tar_members))
if not control_file_member:
raise self.DebError(deb + ' does not Metadata File!')
raise self.DebError(deb + ' does not contain a control Metadata File!')
control_file = tar.extractfile(control_file_member[0])
metadata = b''.join(control_file.readlines())
destination_file = os.path.join(TarFile.DPKG_STATUS_DIR,
TarFile.parse_pkg_name(metadata.decode("utf-8"), deb))
metadata = control_file.read()
pkg_name = TarFile.parse_pkg_name(metadata.decode('utf-8'), deb)
destination_file = os.path.join(TarFile.DPKG_STATUS_DIR, pkg_name)
with self.write_temp_file(data=metadata) as metadata_file:
self.add_file(metadata_file, destination_file)

# Extract the md5sums file listing of package files if present
md5sums_file_member = list(filter(lambda f: os.path.basename(f.name) == TarFile.PKG_MD5SUMS_FILE, tar_members))
if md5sums_file_member:
md5sums_file = tar.extractfile(md5sums_file_member[0])
md5sums = md5sums_file.read()
destination_file = os.path.join(TarFile.DPKG_STATUS_DIR, '{0}.md5sums'.format(pkg_name))
with self.write_temp_file(data=md5sums) as files_list:
self.add_file(files_list, destination_file)

except (KeyError, TypeError) as e:
raise self.DebError(deb + ' contains invalid Metadata! Exeception {0}'.format(e))
raise self.DebError(deb + ' contains invalid Metadata! Exception {0}'.format(e))
except Exception as e:
raise self.DebError('Unknown Exception {0}. Please report an issue at'
' github.com/bazelbuild/rules_docker.'.format(e))
Expand Down Expand Up @@ -473,9 +490,9 @@ def validate_link(l):
parser.add_argument('--xz_path', type=str,
help='Specify the path to xz as a fallback when the Python '
'lzma module is unavailable.')

parser.add_argument('--force_posixpath', type=bool, default=False,
help='Force the use of posixpath when normalizing file paths. This is useful'
help='Force the use of posixpath when normalizing file paths. This is useful'
'when building in a non-posix environment.')

main(parser.parse_args())
34 changes: 33 additions & 1 deletion tests/container/build_tar_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,44 @@ def testPkgMetadataStatusFileName(self):

self.assertIn('./var/lib/dpkg/status.d/test', contained_names)


def testPackageNameParserInvalidMetadata(self):
metadata = "Package Name: Invalid"
self.assertEqual('test-invalid-pkg',
TarFile.parse_pkg_name(metadata, "some/path/test-invalid-pkg.deb"))

def testPkgMetadataMd5sumsFileName(self):
metadata = """Package: test
Description: Dummy
Version: 1.2.4
"""
md5sums ="""4006d28dbf6dfbe2c0fe695839e64cb3 usr/lib/python3/dist-packages/docutils/languages/cs.py
"""
with tempfile.TemporaryDirectory() as tmp:
# write control file into a metadata tar
control_file_name = path.join(tmp, "control")
with open(control_file_name, "w") as control_file:
control_file.write(metadata)
# write md5sums file into a metadata tar
md5sums_file_name = path.join(tmp, "md5sums")
with open(md5sums_file_name, "w") as md5sums_file:
md5sums_file.write(md5sums)
metadata_tar_file_name = path.join(tmp, "metadata.tar")

with tarfile.open(metadata_tar_file_name, "w") as metadata_tar_file:
metadata_tar_file.add(control_file_name, arcname="control")
metadata_tar_file.add(md5sums_file_name, arcname="md5sums")

output_file_name = path.join(tmp, "output.tar")
with TarFile(output_file_name, directory="/", compression=None, root_directory="./", default_mtime=None,
enable_mtime_preservation=False, xz_path="", force_posixpath=False) as output_file:
output_file.add_pkg_metadata(metadata_tar_file_name, "ignored.deb")

with tarfile.open(output_file_name) as output_file:
contained_names = output_file.getnames()

self.assertIn('./var/lib/dpkg/status.d/test', contained_names)
self.assertIn('./var/lib/dpkg/status.d/test.md5sums', contained_names)


if __name__ == '__main__':
unittest.main()

0 comments on commit 558ae6f

Please sign in to comment.