-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcontainerscan.py
executable file
·389 lines (326 loc) · 16.1 KB
/
containerscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
#!/usr/bin/env python
import subprocess
import os
import sys
import datetime
import yaml
import git
import shutil
import tempfile
import pdb
import re
import errno
import tarfile
import tempfile
class Scanner():
def __init__(self):
"""Initialize the Scanner."""
clam_version = subprocess.check_output(['clamscan',
'--version'])
self.clamav_version = re.split('[\s\/]', clam_version)[1]
proc = subprocess.Popen(['sigtool', '--info=/var/lib/clamav/main.cvd'],
stdout=subprocess.PIPE)
self.clamav_definitions = subprocess.Popen(['grep', '^Version'],
stdin=proc.stdout, stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()[0].split()[1]
proc.stdout.close()
self.layerList = {}
self.scannedLayers = []
self.yaml_report = {
'clamAV_version': self.clamav_version,
'clamAV_definitions': self.clamav_definitions,
'beginTime': datetime.datetime.utcnow(),
'images': []
}
def getEnvironmentVariables(self):
"""Populate variables from Linux environment variables.
Required Variables:
dockerServer (docker server for docker login ex: docker-server.domain.com)
gerritUsername (username used to access code base)
dockerUsername (username used in docker login comamand)
dockerPassword (password used in docker login command)
reportDirectory (full path where script should place reports)
tempDirectory (full path where script should place temp files)
needCleanup (True - remove temp files after each step, False - do not remove any files)
scanType (singleImage - scans a single image, imageList - scans a list of images)
Optional Variables:
previouslyScannedFile (full path to a previous report)
imageToScan (requires scanType=SingleImage, name of single image to scan)
repoToScan (requires scanType=imageList, pull list of images from repo, ex: gerrit-server.domain.com:port/repo.git)
fileToScan (requires scanType=imageList, file that contains list of images in repo, ex: path/to/images_list.yaml)
"""
if os.environ.get('previouslyScannedFile') is not None:
self.previouslyScannedFile = os.environ.get('previouslyScannedFile')
else:
self.previouslyScannedFile = None
if os.environ.get('dockerServer') is not None:
self.dockerServer = os.environ.get('dockerServer')
else:
print "Docker Server is not specified."
sys.exit(0)
if os.environ.get('gerritUsername') is not None:
self.gerritUser = os.environ.get('gerritUsername')
else:
print "Gerrit Username is not specified."
sys.exit(0)
if os.environ.get('dockerUsername') is not None:
self.dockerUser = os.environ.get('dockerUsername')
else:
print "Docker Username is not specified."
sys.exit(0)
if os.environ.get('dockerPassword') is not None:
self.dockerPassword = os.environ.get('dockerPassword')
else:
print "Docker Password is not specified."
sys.exit(0)
if os.environ.get('reportDirectory') is not None:
self.reportDirectory = os.environ.get('reportDirectory')
else:
print "Report Directory is not specified."
sys.exit(0)
if os.environ.get('tempDirectory') is not None:
self.tempDirectory = os.environ.get('tempDirectory')
else:
print "Temporary Directory is not specified."
sys.exit(0)
if os.environ.get('needCleanup') is not None:
self.needCleanup = os.environ.get('needCleanup')
else:
print "Cleanup mode is not specified."
sys.exit(0)
if os.environ.get('scanType') == 'singleImage':
if os.environ.get('imageToScan') is not None:
self.imageToScan = os.environ.get('imageToScan')
self.scanType = os.environ.get('scanType')
else:
print "scanType is singleImage but imageToScan is not specified"
sys.exit(0)
elif os.environ.get('scanType') == 'imageList':
if os.environ.get('repoToScan') is not None:
self.repoToScan = os.environ.get('repoToScan')
self.scanType = os.environ.get('scanType')
else:
print "scanType is set to imageList but repoToScan is not specified"
sys.exit(0)
if os.environ.get('fileToScan') is not None:
self.fileToScan = os.environ.get('fileToScan')
else:
print "scanType is set to imageList but fileToScan is not specified"
sys.exit(0)
else:
print "A valid scanType is not specified. Expect imageList or singleImage"
sys.exit(0)
def dockerLogin(self):
"""Execute docker login comamand."""
try:
subprocess.call(['docker', 'login', '--username', self.dockerUser,
'--password', self.dockerPassword, self.dockerServer])
except Exception as e:
print e
sys.exit(0)
def createTempDirectories(self):
"""Create temporary directories for reports and tarballs"""
try:
os.makedirs(self.reportDirectory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
try:
os.makedirs(self.tempDirectory)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def getPreviouslyScannedLayers(self):
"""Get list of layers scanned in a previous report.
Determine if ClamAV version and ClamAV virus definitions have changed.
If they have, run a full scan. If they have not, parse the previously
generated report for a list of layers. Do not scan those layers during
this scan if the layer and virus information are both unchanged.
"""
if self.previouslyScannedFile:
with open(self.previouslyScannedFile, 'r') as stream:
try:
f = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
sys.exit(0)
if self.clamav_version != f['clamAV_version'] or self.clamav_definitions != f['clamAV_definitions']:
print "Running full scan as ClamAV version or definitions have changed."
else:
print "ClamAV version and definitions have not changed. Ignoring previously scanned images"
for imageID in f['images']:
imageName = imageID.keys()[0]
for layerID in imageID[imageName]['layers']:
if layerID not in self.scannedLayers:
self.scannedLayers.append(layerID)
def getImagesToScan(self):
"""Determine which image(s) need to be scanned.
If scanning a list of images, clone the repo containing that list of
images. Parse the specific file containing the images to build a list
of image names.
If scanning a single image create a list with the correct image name.
Return the generated list for the scanner to loop over.
"""
self.imageList = []
if self.scanType == 'imageList':
cloneLink = 'ssh://%s@%s' % (self.gerritUser, self.repoToScan)
tempGitDirectory = tempfile.mkdtemp()
git.Repo.clone_from(cloneLink, tempGitDirectory, branch='master',
depth=1)
imagesFilePath = os.path.join(tempGitDirectory, self.fileToScan)
with open(imagesFilePath, 'r') as stream:
try:
f = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
sys.exit(0)
for imageName in f['data']['images_refs']['images'].values():
if imageName.find("DOCKER_") != -1:
dockerDomain = os.environ.get('dockerDomain')
dockerOpenDomain = os.environ.get('dockerOpenDomain')
image=imageName.replace("DOCKER_DOMAIN", dockerDomain).replace(
"DOCKER_OPEN_DOMAIN", dockerOpenDomain)
self.imageList.append(image)
else:
self.imageList.append(imageName)
shutil.rmtree(tempGitDirectory)
else:
self.imageList.append(self.imageToScan)
def saveImageAsTar(self, image_name):
"""Perform docker pull and docker save (as tar) commands.
Special characters may be present in image name, replace them with '.'
and save the image roughly as (image_name).tar
If encountering errors at this step, ensure docker login credentials are
correct, and the user has appropriate permissions.
"""
image_name = image_name.split('/')[-1]
tar_path = '%s/%s.tar' % (self.tempDirectory,
image_name.replace('/','.').replace(':','.').replace('@','.'))
tarball = open(tar_path, 'w')
subprocess.call(['docker', 'pull', image_name])
subprocess.call(['docker', 'save', image_name], stdout=tarball)
return tar_path
def scanLayer(self, imageID, layerID, layer_path):
"""Scan individual layer with ClamAV and report results
Each layer should be scanned individually as some images may have common
base layers this approach will speed up overall scan time without
sacrificing security. Count each layerID to later determine if each
layer is unique or is a common base layer across multiple images.
Append important information to log file, saved as (layer_id).log
"""
# Update "seen" counter for each layer
if layerID in self.layerList:
self.layerList[layerID] += 1
else:
self.layerList[layerID] = 1
# Docker images can have layers in common, do not repeat scan of layers
if layerID not in self.scannedLayers:
scanTime = datetime.datetime.utcnow()
# Scan layer directory using ClamAV and log results
log_path = '%s/%s.log' % (self.reportDirectory, layerID)
subprocess.call(['clamscan', '--recursive', '--verbose',
'--log=%s' % log_path, layer_path])
# ClamAV generated a log, append important information.
logFile = open(log_path, 'a+')
logFile.write('Layer ID: %s\n' % layerID)
logFile.write('Scan Time: %s\n' % str(scanTime))
logFile.write('ClamAV Version: %s\n' % self.clamav_version)
logFile.write('ClamAV Definitions: %s\n' % self.clamav_definitions)
logFile.close()
# Add this layer to list of previously scanned layers.
self.scannedLayers.append(layerID)
def removeTempFiles(self, dir_path, tar_path):
"""Remove temporary files generated by script if required by user."""
if self.needCleanup == "True":
shutil.rmtree(dir_path)
os.remove(tar_path)
def safeNewDirectory(self, dir_path):
"""From a proposed path, creates and returns a safe directory path."""
if os.path.exists(dir_path):
prefix = os.path.basename(dir_path)
directory = os.path.dirname(dir_path)
dir_path = tempfile.mkdtemp(prefix=prefix, dir=directory)
return dir_path
def unTar(self, tar_path, dir_path):
"""Untars a file to a directory, and then removes the tarball."""
print "Extracting '%s' to '%s'" % (tar_path, dir_path)
tar = tarfile.open(tar_path)
tar.extractall(dir_path)
tar.close()
os.remove(tar_path)
def recursiveExtractor(self, dir_path):
"""Walks a directory untaring all nested tarfiles within it."""
dir_contents = os.listdir(dir_path)
for item in dir_contents:
item_fullpath = os.path.join(dir_path, item)
if os.path.isdir(item_fullpath):
scanner.recursiveExtractor(item_fullpath)
elif os.path.isfile(item_fullpath):
# NOTE: This is crude but matches the previous behaviour
file_extensions = ('.tar', '.tgz', '.tbz', '.tb2', '.tar.gz', '.tar.bz2')
filename, file_extension = os.path.splitext(item_fullpath)
if file_extension in file_extensions:
print "Found nested tarball at '%s'" % (item_fullpath)
target_dir = filename
scanner.unTar(item_fullpath, scanner.safeNewDirectory(target_dir))
dir_contents.append(target_dir)
def scanImages(self):
"""Scan each image and add the results to the yaml reports
Using the list of images to scan, execute the docker pull/save commands,
then extract the nested tar file into a directory structure for ClamAV
to scan (ClamAV does not scan archives directly).
Loop over the list of layers in each image, skip the layer if it has
already been scanned. Count the number of times the layer was seen, and
if needed remove temporary files generated by script.
Update yaml report.
"""
for imageID in scanner.imageList:
# Initialize report for this image to go into final YAML report
image_results = {imageID: {'layers': [], 'unique_image': True}}
# Save this image, extract it, and get the path to the directory
tar_path = os.path.abspath(scanner.saveImageAsTar(imageID))
dir_path = os.path.abspath(tar_path.replace('.tar', ''))
# Untar the image into its directory
scanner.unTar(tar_path, dir_path)
# Recursively extract all layers and tarballs within them
scanner.recursiveExtractor(dir_path)
for layerID in os.listdir(dir_path):
layer_path = os.path.join(dir_path, layerID)
# Layers are directories, scan the layers and their contents.
if os.path.isdir(layer_path):
scanner.scanLayer(imageID, layerID, layer_path)
image_results[imageID]['layers'].append(layerID)
# Add results of this image to YAML report, remove temp files.
self.yaml_report['images'].append(image_results)
scanner.removeTempFiles(dir_path, tar_path)
def determineUniqueImages(self):
"""Determine if each image has any common base layers.
Check each image to see if any of its layers were seen more than once
during the entire scan. If so this is not a unique image as it shares
at least one common base layer.
"""
for imageID in self.yaml_report['images']:
imageName = imageID.keys()[0]
for layerID in imageID[imageName]['layers']:
if self.layerList[layerID] > 1:
imageID[imageName]['unique_image'] = False
def generateReport(self):
"""Generate YAML report."""
endTime = datetime.datetime.utcnow()
self.yaml_report['endTime'] = endTime
report_path = '%s/clamAV_results-%s.yaml' % (
os.path.abspath(self.reportDirectory),
endTime.strftime("%Y%m%d-%H%M%S"))
with open(report_path, 'w') as outfile:
yaml.dump(self.yaml_report, outfile, default_flow_style=False)
outfile.close()
if __name__ == '__main__':
scanner = Scanner()
scanner.getEnvironmentVariables()
scanner.createTempDirectories()
scanner.dockerLogin()
scanner.getPreviouslyScannedLayers()
scanner.getImagesToScan()
scanner.scanImages()
scanner.determineUniqueImages()
scanner.generateReport()