-
Notifications
You must be signed in to change notification settings - Fork 482
/
Copy pathbase.py
342 lines (279 loc) · 11.2 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import re
from abc import ABCMeta
from abc import abstractmethod
from abc import abstractproperty
from .common.constants import ALLOWLIST_REGEXES
from detect_secrets.core.code_snippet import CodeSnippetHighlighter
from detect_secrets.core.constants import VerifiedResult
from detect_secrets.core.potential_secret import PotentialSecret
# NOTE: In this whitepaper (Section V-D), it suggests that there's an
# 80% chance of finding a multi-factor secret (e.g. username +
# password) within five lines of context, before and after a secret.
#
# This number can be tweaked if desired, at the cost of performance.
#
# https://www.ndss-symposium.org/wp-content/uploads/2019/02/ndss2019_04B-3_Meli_paper.pdf
LINES_OF_CONTEXT = 5
class classproperty(property):
def __get__(self, cls, owner):
return classmethod(self.fget).__get__(None, owner)()
class BasePlugin(object):
"""
This is an abstract class to define Plugins API.
:type secret_type: str
:param secret_type: uniquely identifies the type of secret found in the baseline.
e.g. {
"hashed_secret": <hash>,
"line_number": 123,
"type": <secret_type>,
}
Be warned of modifying the `secret_type` once rolled out to clients since
the hashed_secret uses this value to calculate a unique hash (and the baselines
will no longer match).
:type disable_flag_text: str
:param disable_flag_text: text used as an command line argument flag to disable
this specific plugin scan. does not include the `--` prefix.
:type default_options: Dict[str, Any]
:param default_options: configurable options to modify plugin behavior
"""
__metaclass__ = ABCMeta
@abstractproperty
def secret_type(self):
raise NotImplementedError
def __init__(
self,
exclude_lines_regex=None,
should_verify=False,
false_positive_heuristics=None,
**kwargs
):
"""
:type exclude_lines_regex: str|None
:param exclude_lines_regex: optional regex for ignored lines.
:type should_verify: bool
:type false_positive_heuristics: List[Callable]|None
:param false_positive_heuristics: List of fp-heuristic functions
applicable to this plugin
"""
self.exclude_lines_regex = None
if exclude_lines_regex:
self.exclude_lines_regex = re.compile(exclude_lines_regex)
self.should_verify = should_verify
self.false_positive_heuristics = (
false_positive_heuristics
if false_positive_heuristics
else []
)
@classproperty
def disable_flag_text(cls):
name = cls.__name__
if name.endswith('Detector'):
name = name[:-len('Detector')]
# turn camel case into hyphenated strings
name_hyphen = ''
for letter in name:
if letter.upper() == letter and name_hyphen:
name_hyphen += '-'
name_hyphen += letter.lower()
return 'no-{}-scan'.format(name_hyphen)
@classproperty
def default_options(cls):
return {}
def analyze(self, file, filename):
"""
:param file: The File object itself.
:param filename: string; filename of File object, used for creating
PotentialSecret objects
:returns dictionary representation of set (for random access by hash)
{ detect_secrets.core.potential_secret.__hash__:
detect_secrets.core.potential_secret }
"""
potential_secrets = {}
file_lines = tuple(file.readlines())
for line_num, line in enumerate(file_lines, start=1):
results = self.analyze_line(line, line_num, filename)
if not self.should_verify:
potential_secrets.update(results)
continue
filtered_results = {}
for result in results:
snippet = CodeSnippetHighlighter().get_code_snippet(
file_lines,
result.lineno,
lines_of_context=LINES_OF_CONTEXT,
)
is_verified = self.verify(result.secret_value, content=str(snippet))
if is_verified == VerifiedResult.VERIFIED_TRUE:
result.is_verified = True
if is_verified != VerifiedResult.VERIFIED_FALSE:
filtered_results[result] = result
potential_secrets.update(filtered_results)
return potential_secrets
def analyze_line(self, string, line_num, filename):
"""
:param string: string; the line to analyze
:param line_num: integer; line number that is currently being analyzed
:param filename: string; name of file being analyzed
:returns: dictionary
NOTE: line_num and filename are used for PotentialSecret creation only.
"""
if (
any(
allowlist_regex.search(string) for allowlist_regex in ALLOWLIST_REGEXES
)
or (
self.exclude_lines_regex and
self.exclude_lines_regex.search(string)
)
):
return {}
return self.analyze_string_content(
string,
line_num,
filename,
)
@abstractmethod
def analyze_string_content(self, string, line_num, filename):
"""
:param string: string; the line to analyze
:param line_num: integer; line number that is currently being analyzed
:param filename: string; name of file being analyzed
:returns: dictionary
NOTE: line_num and filename are used for PotentialSecret creation only.
"""
raise NotImplementedError
@abstractmethod
def secret_generator(self, string, *args, **kwargs):
"""Flags secrets in a given string, and yields the raw secret value.
Used in self.analyze_line for PotentialSecret creation.
:type string: str
:param string: the secret to scan
:rtype: iter
:returns: Of all the identifiers found
"""
raise NotImplementedError
def adhoc_scan(self, string):
"""To support faster discovery, we want the ability to conveniently
check what different plugins say regarding a single line/secret. This
supports that.
This is very similar to self.analyze_line, but allows the flexibility
for subclasses to add any other notable info (rather than just a
PotentialSecret type). e.g. HighEntropyStrings adds their Shannon
entropy in which they made their decision.
:type string: str
:param string: the string to analyze
:rtype: str
:returns: descriptive string that fits the format
<classname>: <returned-value>
"""
# TODO: Handle multiple secrets on single line.
results = self.analyze_line(
string,
line_num=0,
filename='does_not_matter',
)
if not results:
return 'False'
if not self.should_verify:
return 'True'
verified_result = VerifiedResult.UNVERIFIED
for result in results:
is_verified = self.verify(result.secret_value)
if is_verified != VerifiedResult.UNVERIFIED:
verified_result = is_verified
break
output = {
VerifiedResult.VERIFIED_FALSE: 'False (verified)',
VerifiedResult.VERIFIED_TRUE: 'True (verified)',
VerifiedResult.UNVERIFIED: 'True (unverified)',
}
return output[verified_result]
def verify(self, token, content=''):
"""
To increase accuracy and reduce false positives, plugins can also
optionally declare a method to verify their status.
:type token: str
:param token: secret found by current plugin
:type context: str
:param context: lines of context around identified secret
:rtype: VerifiedResult
"""
return VerifiedResult.UNVERIFIED
def is_secret_false_positive(self, token):
"""
Checks if the input secret is a false-positive according to
this plugin's heuristics.
:type token: str
:param token: secret found by current plugin
"""
return any(
func(token)
for func in self.false_positive_heuristics
) if self.false_positive_heuristics else False
@property
def __dict__(self):
return {
'name': self.__class__.__name__,
}
class RegexBasedDetector(BasePlugin):
"""Parent class for regular-expression based detectors.
To create a new regex-based detector, subclass this and set
`secret_type` with a description and `denylist`
with a sequence of regular expressions, like:
class FooDetector(RegexBasedDetector):
secret_type = "foo"
denylist = (
re.compile(r'foo'),
)
"""
__metaclass__ = ABCMeta
@abstractproperty
def denylist(self):
raise NotImplementedError
@staticmethod
def assign_regex_generator(prefix_regex, secret_keyword_regex, secret_regex):
"""Generate assignment regex
It reads 3 input parameters, each stands for regex. The return regex would look for
secret in following format.
<prefix_regex>(-|_|)<secret_keyword_regex> <assignment> <secret_regex>
assignment would include =,:,:=,::
keyname and value supports optional quotes
"""
begin = r'(?:(?<=\W)|(?<=^))'
opt_quote = r'(?:"|\'|)'
opt_open_square_bracket = r'(?:\[|)'
opt_close_square_bracket = r'(?:\]|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_space = r'(?: *)'
assignment = r'(?:=|:|:=|=>| +|::)'
return re.compile(
r'{begin}{opt_open_square_bracket}{opt_quote}{prefix_regex}{opt_dash_undrscr}'
'{secret_keyword_regex}{opt_quote}{opt_close_square_bracket}{opt_space}'
'{assignment}{opt_space}{opt_quote}{secret_regex}{opt_quote}'.format(
begin=begin,
opt_open_square_bracket=opt_open_square_bracket,
opt_quote=opt_quote,
prefix_regex=prefix_regex,
opt_dash_undrscr=opt_dash_undrscr,
secret_keyword_regex=secret_keyword_regex,
opt_close_square_bracket=opt_close_square_bracket,
opt_space=opt_space,
assignment=assignment,
secret_regex=secret_regex,
), flags=re.IGNORECASE,
)
def analyze_string_content(self, string, line_num, filename):
output = {}
for identifier in self.secret_generator(string):
secret = PotentialSecret(
self.secret_type,
filename,
identifier,
line_num,
)
output[secret] = secret
return output
def secret_generator(self, string, *args, **kwargs):
for regex in self.denylist:
for match in regex.findall(string):
yield match