From c424cc6b772e9c97a331b8c18eaeaa8a2135e55c Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Wed, 14 Feb 2024 09:47:20 +0100 Subject: [PATCH 1/3] [Thug API] Proxy connect timeout --- docs/source/api.rst | 20 ++++++ tests/ThugAPI/test_ThugAPI.py | 6 ++ thug/DOM/HTTPSession.py | 4 +- thug/ThugAPI/IThugAPI.py | 20 ++++++ thug/ThugAPI/ThugAPI.py | 6 ++ thug/ThugAPI/ThugOpts.py | 19 +++++ thug/thug.py | 129 ++++++++++++++++++---------------- 7 files changed, 142 insertions(+), 62 deletions(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index e42aec3167..435c0cb1da 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -506,6 +506,26 @@ Thug API interface definition is reported below for convenience. @return: None """ + def get_proxy_connect_timeout(): + """ + get_proxy_connect_timeout + + Get the proxy connect timeout (in seconds) + + @return: the proxy connect timeout (in seconds) + """ + + def set_proxy_connect_timeout(timeout): + """ + set_proxy_connect_timeout + + Set the proxy connect timeout (in seconds) + + @param timeout: the proxy connect timeout (in seconds) + @type timeout: C{int} + @return: None + """ + def get_timeout(): """ get_timeout diff --git a/tests/ThugAPI/test_ThugAPI.py b/tests/ThugAPI/test_ThugAPI.py index e52cf473d7..ab042a5dce 100644 --- a/tests/ThugAPI/test_ThugAPI.py +++ b/tests/ThugAPI/test_ThugAPI.py @@ -202,6 +202,12 @@ def test_connect_timeout(self): self.thug_api.set_connect_timeout(20) assert self.thug_api.get_connect_timeout() in (20,) + def test_proxy_connect_timeout(self): + assert self.thug_api.get_proxy_connect_timeout() in (5,) + + self.thug_api.set_proxy_connect_timeout(10) + assert self.thug_api.get_proxy_connect_timeout() in (10,) + def test_broken_url(self): assert not self.thug_api.get_broken_url() diff --git a/thug/DOM/HTTPSession.py b/thug/DOM/HTTPSession.py index da5fd5e918..40ae680762 100644 --- a/thug/DOM/HTTPSession.py +++ b/thug/DOM/HTTPSession.py @@ -41,7 +41,9 @@ def __init__(self, proxy=None): self.filecount = 0 def __check_proxy_alive(self, hostname, port): - s = socket.create_connection((hostname, port), 5.0) + s = socket.create_connection( + (hostname, port), timeout=log.ThugOpts.proxy_connect_timeout + ) s.close() def __do_init_proxy(self, proxy): diff --git a/thug/ThugAPI/IThugAPI.py b/thug/ThugAPI/IThugAPI.py index 449cde28f6..174a6e5742 100644 --- a/thug/ThugAPI/IThugAPI.py +++ b/thug/ThugAPI/IThugAPI.py @@ -482,6 +482,26 @@ def set_connect_timeout(timeout): @return: None """ + def get_proxy_connect_timeout(): + """ + get_proxy_connect_timeout + + Get the proxy connect timeout (in seconds) + + @return: the proxy connect timeout (in seconds) + """ + + def set_proxy_connect_timeout(timeout): + """ + set_proxy_connect_timeout + + Set the proxy connect timeout (in seconds) + + @param timeout: the proxy connect timeout (in seconds) + @type timeout: C{int} + @return: None + """ + def get_timeout(): """ get_timeout diff --git a/thug/ThugAPI/ThugAPI.py b/thug/ThugAPI/ThugAPI.py index d3b40ea226..5337c2f762 100644 --- a/thug/ThugAPI/ThugAPI.py +++ b/thug/ThugAPI/ThugAPI.py @@ -295,6 +295,12 @@ def get_connect_timeout(self): def set_connect_timeout(self, timeout): log.ThugOpts.connect_timeout = timeout + def get_proxy_connect_timeout(self): + return log.ThugOpts.proxy_connect_timeout + + def set_proxy_connect_timeout(self, timeout): + log.ThugOpts.proxy_connect_timeout = timeout + def get_broken_url(self): return log.ThugOpts.broken_url diff --git a/thug/ThugAPI/ThugOpts.py b/thug/ThugAPI/ThugOpts.py index a173e715b4..8d7d61c50d 100644 --- a/thug/ThugAPI/ThugOpts.py +++ b/thug/ThugAPI/ThugOpts.py @@ -44,6 +44,7 @@ def __init__(self): self.extensive = False self._threshold = 0 self._connect_timeout = 10 + self._proxy_connect_timeout = 5 self._timeout = 600 self.ast_debug = False self.http_debug = 0 @@ -281,6 +282,24 @@ def set_connect_timeout(self, timeout): connect_timeout = property(get_connect_timeout, set_connect_timeout) + def get_proxy_connect_timeout(self): + return self._proxy_connect_timeout + + def set_proxy_connect_timeout(self, timeout): + try: + seconds = int(timeout) + except ValueError: + log.warning( + "[WARNING] Ignoring invalid proxy connect timeout value (should be an integer)" + ) + return + + self._proxy_connect_timeout = seconds + + proxy_connect_timeout = property( + get_proxy_connect_timeout, set_proxy_connect_timeout + ) + def get_timeout(self): return self._timeout diff --git a/thug/thug.py b/thug/thug.py index 67bf0120bf..8b4fac3589 100644 --- a/thug/thug.py +++ b/thug/thug.py @@ -44,72 +44,73 @@ def usage(self): thug [ options ] url Options: - -h, --help \tDisplay this help information - -V, --version \tDisplay Thug version - -i, --list-ua \tDisplay available user agents - -u, --useragent= \tSelect a user agent (use option -b for values, default: winxpie60) - -e, --events= \tEnable comma-separated specified DOM events handling - -w, --delay= \tSet a maximum setTimeout/setInterval delay value (in milliseconds) - -n, --logdir= \tSet the log output directory - -o, --output= \tLog to a specified file - -r, --referer \tSpecify a referer - -p, --proxy= \tSpecify a proxy (see below for format and supported schemes) - -m, --attachment \tSet the attachment mode - -l, --local \tAnalyze a locally saved page - -x, --local-nofetch \tAnalyze a locally saved page and prevent remote content fetching - -v, --verbose \tEnable verbose mode - -d, --debug \tEnable debug mode - -q, --quiet \tDisable console logging - -g, --http-debug \tEnable HTTP debug mode - -t, --threshold \tMaximum pages to fetch - -j, --extensive \tExtensive fetch of linked pages - -O, --connect-timeout \tSet the connect timeout (in seconds, default: 10 seconds) - -T, --timeout= \tSet the analysis timeout (in seconds, default: 600 seconds) - -c, --broken-url \tSet the broken URL mode - -z, --web-tracking \tEnable web client tracking inspection - -b, --async-prefetch \tEnable async prefetching mode - -k, --no-honeyagent \tDisable HoneyAgent support - -a, --image-processing \tEnable image processing analysis - -f, --screenshot \tEnable screenshot capturing - -E, --awis \tEnable AWS Alexa Web Information Service (AWIS) - -s, --no-down-prevent \tDisable download prevention mechanism + -h, --help \tDisplay this help information + -V, --version \tDisplay Thug version + -i, --list-ua \tDisplay available user agents + -u, --useragent= \tSelect a user agent (use option -b for values, default: winxpie60) + -e, --events= \tEnable comma-separated specified DOM events handling + -w, --delay= \tSet a maximum setTimeout/setInterval delay value (in milliseconds) + -n, --logdir= \tSet the log output directory + -o, --output= \tLog to a specified file + -r, --referer \tSpecify a referer + -p, --proxy= \tSpecify a proxy (see below for format and supported schemes) + -m, --attachment \tSet the attachment mode + -l, --local \tAnalyze a locally saved page + -x, --local-nofetch \tAnalyze a locally saved page and prevent remote content fetching + -v, --verbose \tEnable verbose mode + -d, --debug \tEnable debug mode + -q, --quiet \tDisable console logging + -g, --http-debug \tEnable HTTP debug mode + -t, --threshold \tMaximum pages to fetch + -j, --extensive \tExtensive fetch of linked pages + -O, --connect-timeout \tSet the connect timeout (in seconds, default: 10 seconds) + -Y, --proxy-connect-timeout \tSet the proxy connect timeout (in seconds, default: 5 seconds) + -T, --timeout= \tSet the analysis timeout (in seconds, default: 600 seconds) + -c, --broken-url \tSet the broken URL mode + -z, --web-tracking \tEnable web client tracking inspection + -b, --async-prefetch \tEnable async prefetching mode + -k, --no-honeyagent \tDisable HoneyAgent support + -a, --image-processing \tEnable image processing analysis + -f, --screenshot \tEnable screenshot capturing + -E, --awis \tEnable AWS Alexa Web Information Service (AWIS) + -s, --no-down-prevent \tDisable download prevention mechanism Plugins: - -A, --adobepdf= \tSpecify Adobe Acrobat Reader version (default: 9.1.0) - -P, --no-adobepdf \tDisable Adobe Acrobat Reader plugin - -S, --shockwave= \tSpecify Shockwave Flash version (default: 10.0.64.0) - -R, --no-shockwave \tDisable Shockwave Flash plugin - -J, --javaplugin= \tSpecify JavaPlugin version (default: 1.6.0.32) - -K, --no-javaplugin \tDisable Java plugin - -L, --silverlight \tSpecify SilverLight version (default: 4.0.50826.0) - -N, --no-silverlight \tDisable SilverLight plugin + -A, --adobepdf= \tSpecify Adobe Acrobat Reader version (default: 9.1.0) + -P, --no-adobepdf \tDisable Adobe Acrobat Reader plugin + -S, --shockwave= \tSpecify Shockwave Flash version (default: 10.0.64.0) + -R, --no-shockwave \tDisable Shockwave Flash plugin + -J, --javaplugin= \tSpecify JavaPlugin version (default: 1.6.0.32) + -K, --no-javaplugin \tDisable Java plugin + -L, --silverlight \tSpecify SilverLight version (default: 4.0.50826.0) + -N, --no-silverlight \tDisable SilverLight plugin Classifiers: - --htmlclassifier= \tSpecify a list of additional (comma separated) HTML classifier rule files - --urlclassifier= \tSpecify a list of additional (comma separated) URL classifier rule files - --jsclassifier= \tSpecify a list of additional (comma separated) JS classifier rule files - --vbsclassifier= \tSpecify a list of additional (comma separated) VBS classifier rule files - --sampleclassifier= \tSpecify a list of additional (comma separated) Sample classifier rule files - --textclassifier= \tSpecify a list of additional (comma separated) Text classifier rule files - --cookieclassifier= \tSpecify a list of additional (comma separated) Cookie classifier rule files - --imageclassifier= \tSpecify a list of additional (comma separated) Image classifier rule files - --htmlfilter= \tSpecify a list of additional (comma separated) HTML filter files - --urlfilter= \tSpecify a list of additional (comma separated) URL filter files - --jsfilter= \tSpecify a list of additional (comma separated) JS filter files - --vbsfilter= \tSpecify a list of additional (comma separated) VBS filter files - --samplefilter= \tSpecify a list of additional (comma separated) Sample filter files - --textfilter= \tSpecify a list of additional (comma separated) Text filter files - --cookiefilter= \tSpecify a list of additional (comma separated) Cookie filter files - --imagefilter= \tSpecify a list of additional (comma separated) Image filter files + --htmlclassifier= \tSpecify a list of additional (comma separated) HTML classifier rule files + --urlclassifier= \tSpecify a list of additional (comma separated) URL classifier rule files + --jsclassifier= \tSpecify a list of additional (comma separated) JS classifier rule files + --vbsclassifier= \tSpecify a list of additional (comma separated) VBS classifier rule files + --sampleclassifier= \tSpecify a list of additional (comma separated) Sample classifier rule files + --textclassifier= \tSpecify a list of additional (comma separated) Text classifier rule files + --cookieclassifier= \tSpecify a list of additional (comma separated) Cookie classifier rule files + --imageclassifier= \tSpecify a list of additional (comma separated) Image classifier rule files + --htmlfilter= \tSpecify a list of additional (comma separated) HTML filter files + --urlfilter= \tSpecify a list of additional (comma separated) URL filter files + --jsfilter= \tSpecify a list of additional (comma separated) JS filter files + --vbsfilter= \tSpecify a list of additional (comma separated) VBS filter files + --samplefilter= \tSpecify a list of additional (comma separated) Sample filter files + --textfilter= \tSpecify a list of additional (comma separated) Text filter files + --cookiefilter= \tSpecify a list of additional (comma separated) Cookie filter files + --imagefilter= \tSpecify a list of additional (comma separated) Image filter files Logging: - -F, --file-logging \tEnable file logging mode (default: disabled) - -Z, --json-logging \tEnable JSON logging mode (default: disabled) - -W, --features-logging \tEnable features logging mode (default: disabled) - -G, --elasticsearch-logging\tEnable ElasticSearch logging mode (default: disabled) - -D, --mongodb-address= \tSpecify address and port of the MongoDB instance (format: host:port) - -Y, --no-code-logging \tDisable code logging - -U, --no-cert-logging \tDisable SSL/TLS certificate logging + -F, --file-logging \tEnable file logging mode (default: disabled) + -Z, --json-logging \tEnable JSON logging mode (default: disabled) + -W, --features-logging \tEnable features logging mode (default: disabled) + -G, --elasticsearch-logging \tEnable ElasticSearch logging mode (default: disabled) + -D, --mongodb-address= \tSpecify address and port of the MongoDB instance (format: host:port) + -Y, --no-code-logging \tDisable code logging + -U, --no-cert-logging \tDisable SSL/TLS certificate logging Proxy Format: scheme://[username:password@]host:port (supported schemes: http, socks4, socks5) @@ -141,7 +142,7 @@ def analyze(self): try: options, args = getopt.getopt( self.args, - "hViu:e:w:n:o:r:p:mzbkafEslxvdqgA:PS:RJ:KL:Nt:jO:T:cFZWGYUD:", + "hViu:e:w:n:o:r:p:mzbkafEslxvdqgA:PS:RJ:KL:Nt:jO:Y:T:cFZWGYUD:", [ "help", "version", @@ -178,6 +179,7 @@ def analyze(self): "threshold=", "extensive", "connect-timeout=", + "proxy-connect-timeout=", "timeout=", "broken-url", "htmlclassifier=", @@ -367,6 +369,11 @@ def analyze(self): "--connect-timeout", ): self.set_connect_timeout(option[1]) + elif option[0] in ( + "-Y", + "--proxy-connect-timeout", + ): + self.set_proxy_connect_timeout(option[1]) elif option[0] in ( "-T", "--timeout", From 80476cdcef70cf507935a19e1d8b7ff8ef006f6f Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Wed, 14 Feb 2024 10:01:47 +0100 Subject: [PATCH 2/3] Proxy connect timeout unit test improvement --- tests/ThugAPI/test_ThugAPI.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ThugAPI/test_ThugAPI.py b/tests/ThugAPI/test_ThugAPI.py index ab042a5dce..236cda0010 100644 --- a/tests/ThugAPI/test_ThugAPI.py +++ b/tests/ThugAPI/test_ThugAPI.py @@ -208,6 +208,9 @@ def test_proxy_connect_timeout(self): self.thug_api.set_proxy_connect_timeout(10) assert self.thug_api.get_proxy_connect_timeout() in (10,) + self.thug_api.set_proxy_connect_timeout('foo') + assert self.thug_api.get_proxy_connect_timeout() in (10,) + def test_broken_url(self): assert not self.thug_api.get_broken_url() From 23c2dfe02e5c4a9a7ea4ce919ac4a03088657035 Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Wed, 14 Feb 2024 10:13:02 +0100 Subject: [PATCH 3/3] Minor linting change --- tests/ThugAPI/test_ThugAPI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ThugAPI/test_ThugAPI.py b/tests/ThugAPI/test_ThugAPI.py index 236cda0010..6a65aa0ae2 100644 --- a/tests/ThugAPI/test_ThugAPI.py +++ b/tests/ThugAPI/test_ThugAPI.py @@ -208,7 +208,7 @@ def test_proxy_connect_timeout(self): self.thug_api.set_proxy_connect_timeout(10) assert self.thug_api.get_proxy_connect_timeout() in (10,) - self.thug_api.set_proxy_connect_timeout('foo') + self.thug_api.set_proxy_connect_timeout("foo") assert self.thug_api.get_proxy_connect_timeout() in (10,) def test_broken_url(self):