diff --git a/.pydevproject b/.pydevproject index 24aab1a..6a7714c 100644 --- a/.pydevproject +++ b/.pydevproject @@ -2,8 +2,8 @@ -Default -python 2.6 +ndg-httpsclient-py2.7 +python 2.7 /ndg_httpsclient diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..b58a1db --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +# +# MANIFEST.in file to enable inclusion of unit test data files and config +# +# NDG HTTPS Client Package +# +# P J Kershaw 17/01/12 +# +# Copyright (C) 2012 STFC +# +# Licence: BSD - See LICENCE file for details +recursive-include ndg/ *.crt *.key *.pem README diff --git a/ndg/httpsclient/https.py b/ndg/httpsclient/https.py index dba5a1d..f817613 100644 --- a/ndg/httpsclient/https.py +++ b/ndg/httpsclient/https.py @@ -1,4 +1,4 @@ -"""urllib2pyopenssl HTTPS module containing PyOpenSSL implementation of +"""ndg_httpsclient HTTPS module containing PyOpenSSL implementation of httplib.HTTPSConnection PyOpenSSL utility to make a httplib-like interface suitable for use with @@ -12,8 +12,15 @@ __revision__ = '$Id$' import logging import socket -from httplib import HTTPConnection, HTTPS_PORT -from urllib2 import AbstractHTTPHandler +import sys +from httplib import HTTPS_PORT +if sys.version_info < (2, 6, 2): + from ndg.httpsclient.httplib_proxy import HTTPConnection + from ndg.httpsclient.urllib2_proxy import AbstractHTTPHandler +else: + from httplib import HTTPConnection + from urllib2 import AbstractHTTPHandler + from OpenSSL import SSL @@ -49,15 +56,23 @@ def connect(self): """Create SSL socket and connect to peer """ if getattr(self, 'ssl_context', None): + if not isinstance(self.ssl_context, SSL.Context): + raise TypeError('Expecting OpenSSL.SSL.Context type for "' + 'ssl_context" keyword; got %r instead' % + self.ssl_context) ssl_context = self.ssl_context else: ssl_context = SSL.Context(self.__class__.default_ssl_method) sock = socket.create_connection((self.host, self.port), self.timeout) + + # Tunnel if using a proxy - ONLY available for Python 2.6.2 and above if getattr(self, '_tunnel_host', None): self.sock = sock self._tunnel() + self.sock = SSLSocket(ssl_context, sock) + # Go to client mode. self.sock.set_connect_state() @@ -82,6 +97,10 @@ def __init__(self, ssl_context, debuglevel=0): AbstractHTTPHandler.__init__(self, debuglevel) if ssl_context is not None: + if not isinstance(ssl_context, SSL.Context): + raise TypeError('Expecting OpenSSL.SSL.Context type for "' + 'ssl_context" keyword; got %r instead' % + ssl_context) self.ssl_context = ssl_context else: self.ssl_context = SSL.Context(SSL.SSLv23_METHOD) diff --git a/ndg/httpsclient/test/__init__.py b/ndg/httpsclient/test/__init__.py index 9ddf307..805f5e5 100644 --- a/ndg/httpsclient/test/__init__.py +++ b/ndg/httpsclient/test/__init__.py @@ -1,4 +1,4 @@ -"""unit tests package for urllib2pyopenssl +"""unit tests package for ndg_httpsclient PyOpenSSL utility to make a httplib-like interface suitable for use with urllib2 @@ -9,9 +9,20 @@ __license__ = "BSD - see LICENSE file in top-level directory" __contact__ = "Philip.Kershaw@stfc.ac.uk" __revision__ = '$Id$' +import os +import unittest + class Constants(object): + '''Convenience base class from which other unit tests can extend. Its + sets the generic data directory path''' PORT = 4443 PORT2 = 4444 HOSTNAME = 'localhost' TEST_URI = 'https://%s:%d' % (HOSTNAME, PORT) - TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2) \ No newline at end of file + TEST_URI2 = 'https://%s:%d' % (HOSTNAME, PORT2) + + UNITTEST_DIR = os.path.dirname(os.path.abspath(__path__)) + SSL_CERT_FILENAME = 'localhost.crt' + SSL_CERT_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_CERT_FILENAME) + SSL_PRIKEY_FILENAME = 'localhost.key' + SSL_PRIKEY_FILEPATH = os.path.join(UNITTEST_DIR, 'pki', SSL_PRIKEY_FILENAME) diff --git a/ndg/httpsclient/test/test_get.py b/ndg/httpsclient/test/test_utils.py similarity index 100% rename from ndg/httpsclient/test/test_get.py rename to ndg/httpsclient/test/test_utils.py diff --git a/ndg/httpsclient/urllib2_build_opener.py b/ndg/httpsclient/urllib2_build_opener.py index 93a6e91..b081677 100644 --- a/ndg/httpsclient/urllib2_build_opener.py +++ b/ndg/httpsclient/urllib2_build_opener.py @@ -7,12 +7,17 @@ __license__ = "BSD - see LICENSE file in top-level directory" __contact__ = "Philip.Kershaw@stfc.ac.uk" __revision__ = '$Id: pyopenssl.py 7929 2011-08-16 16:39:13Z pjkersha $' - import logging -from urllib2 import (OpenerDirector, ProxyHandler, UnknownHandler, HTTPHandler, - HTTPDefaultErrorHandler, HTTPRedirectHandler, +from urllib2 import (ProxyHandler, UnknownHandler, HTTPDefaultErrorHandler, FTPHandler, FileHandler, HTTPErrorProcessor) +import sys +if sys.version_info < (2, 6, 2): + from ndg.httpsclient.urllib2_proxy import (HTTPHandler, OpenerDirector, + HTTPRedirectHandler) +else: + from urllib2 import HTTPHandler, OpenerDirector, HTTPRedirectHandler + from ndg.httpsclient.https import HTTPSContextHandler log = logging.getLogger(__name__) diff --git a/ndg/httpsclient/urllib2_proxy.py b/ndg/httpsclient/urllib2_proxy.py new file mode 100644 index 0000000..06a2325 --- /dev/null +++ b/ndg/httpsclient/urllib2_proxy.py @@ -0,0 +1,262 @@ +''' +Created on 12 Jan 2012 + +@author: rwilkinson +''' +import base64 +import socket +import urlparse +from urllib import unquote, addinfourl +from urllib2 import _parse_proxy, URLError, HTTPError +from urllib2 import (AbstractHTTPHandler as _AbstractHTTPHandler, + BaseHandler as _BaseHandler, + HTTPRedirectHandler as _HTTPRedirectHandler, + Request as _Request, + OpenerDirector as _OpenerDirector) + +from ndg.httpsclient.httplib_proxy import HTTPConnection + + +class Request(_Request): + + def __init__(self, *args, **kw): + _Request.__init__(self, *args, **kw) + self._tunnel_host = None + + def set_proxy(self, host, type): + if self.type == 'https' and not self._tunnel_host: + self._tunnel_host = self.host + else: + self.type = type + self.__r_host = self.__original + self.host = host + + +class BaseHandler(_BaseHandler): + def proxy_open(self, req, proxy, type): + if req.get_type() == 'https': + orig_type = req.get_type() + proxy_type, user, password, hostport = _parse_proxy(proxy) + if proxy_type is None: + proxy_type = orig_type + if user and password: + user_pass = '%s:%s' % (unquote(user), unquote(password)) + creds = base64.b64encode(user_pass).strip() + req.add_header('Proxy-authorization', 'Basic ' + creds) + hostport = unquote(hostport) + req.set_proxy(hostport, proxy_type) + # let other handlers take care of it + return None + else: + return _BaseHandler.proxy_open(self, req, proxy, type) + +class AbstractHTTPHandler(_AbstractHTTPHandler): + def do_open(self, http_class, req): + """Return an addinfourl object for the request, using http_class. + + http_class must implement the HTTPConnection API from httplib. + The addinfourl return value is a file-like object. It also + has methods and attributes including: + - info(): return a mimetools.Message object for the headers + - geturl(): return the original request URL + - code: HTTP status code + """ + host = req.get_host() + if not host: + raise URLError('no host given') + + h = http_class(host, timeout=req.timeout) # will parse host:port + h.set_debuglevel(self._debuglevel) + + headers = dict(req.headers) + headers.update(req.unredirected_hdrs) + # We want to make an HTTP/1.1 request, but the addinfourl + # class isn't prepared to deal with a persistent connection. + # It will try to read all remaining data from the socket, + # which will block while the server waits for the next request. + # So make sure the connection gets closed after the (only) + # request. + headers["Connection"] = "close" + headers = dict( + (name.title(), val) for name, val in headers.items()) + + if not hasattr(req, '_tunnel_host'): + pass + + if req._tunnel_host: + h.set_tunnel(req._tunnel_host) + try: + h.request(req.get_method(), req.get_selector(), req.data, headers) + r = h.getresponse() + except socket.error, err: # XXX what error? + raise URLError(err) + + # Pick apart the HTTPResponse object to get the addinfourl + # object initialized properly. + + # Wrap the HTTPResponse object in socket's file object adapter + # for Windows. That adapter calls recv(), so delegate recv() + # to read(). This weird wrapping allows the returned object to + # have readline() and readlines() methods. + + # XXX It might be better to extract the read buffering code + # out of socket._fileobject() and into a base class. + + r.recv = r.read + fp = socket._fileobject(r, close=True) + + resp = addinfourl(fp, r.msg, req.get_full_url()) + resp.code = r.status + resp.msg = r.reason + return resp + + +class HTTPHandler(AbstractHTTPHandler): + + def http_open(self, req): + return self.do_open(HTTPConnection, req) + + http_request = AbstractHTTPHandler.do_request_ + +#if hasattr(httplib, 'HTTPS'): +# class HTTPSHandler(AbstractHTTPHandler): +# +# def https_open(self, req): +# return self.do_open(httplib.HTTPSConnection, req) +# +# https_request = AbstractHTTPHandler.do_request_ + + +class HTTPRedirectHandler(BaseHandler): + # maximum number of redirections to any single URL + # this is needed because of the state that cookies introduce + max_repeats = 4 + # maximum total number of redirections (regardless of URL) before + # assuming we're in a loop + max_redirections = 10 + + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + m = req.get_method() + if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") + or code in (301, 302, 303) and m == "POST"): + # Strictly (according to RFC 2616), 301 or 302 in response + # to a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib2, in this case). In practice, + # essentially all clients do redirect in this case, so we + # do the same. + # be conciliant with URIs containing a space + newurl = newurl.replace(' ', '%20') + newheaders = dict((k,v) for k,v in req.headers.items() + if k.lower() not in ("content-length", "content-type") + ) + return Request(newurl, + headers=newheaders, + origin_req_host=req.get_origin_req_host(), + unverifiable=True) + else: + raise HTTPError(req.get_full_url(), code, msg, headers, fp) + + # Implementation note: To avoid the server sending us into an + # infinite loop, the request object needs to track what URLs we + # have already seen. Do this by adding a handler-specific + # attribute to the Request object. + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + if 'location' in headers: + newurl = headers.getheaders('location')[0] + elif 'uri' in headers: + newurl = headers.getheaders('uri')[0] + else: + return + + # fix a possible malformed URL + urlparts = urlparse.urlparse(newurl) + if not urlparts.path: + urlparts = list(urlparts) + urlparts[2] = "/" + newurl = urlparse.urlunparse(urlparts) + + newurl = urlparse.urljoin(req.get_full_url(), newurl) + + # For security reasons we do not allow redirects to protocols + # other than HTTP, HTTPS or FTP. + newurl_lower = newurl.lower() + if not (newurl_lower.startswith('http://') or + newurl_lower.startswith('https://') or + newurl_lower.startswith('ftp://')): + raise HTTPError(newurl, code, + msg + " - Redirection to url '%s' is not allowed" % + newurl, + headers, fp) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + new = self.redirect_request(req, fp, code, msg, headers, newurl) + if new is None: + return + + # loop detection + # .redirect_dict has a key url if url was previously visited. + if hasattr(req, 'redirect_dict'): + visited = new.redirect_dict = req.redirect_dict + if (visited.get(newurl, 0) >= self.max_repeats or + len(visited) >= self.max_redirections): + raise HTTPError(req.get_full_url(), code, + self.inf_msg + msg, headers, fp) + else: + visited = new.redirect_dict = req.redirect_dict = {} + visited[newurl] = visited.get(newurl, 0) + 1 + + # Don't close the fp until we are sure that we won't use it + # with HTTPError. + fp.read() + fp.close() + + return self.parent.open(new, timeout=req.timeout) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + + inf_msg = "The HTTP server returned a redirect error that would " \ + "lead to an infinite loop.\n" \ + "The last 30x error message was:\n" + + +class OpenerDirector(_OpenerDirector): + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + # accept a URL or a Request object + if isinstance(fullurl, basestring): + req = Request(fullurl, data) + else: + req = fullurl + if data is not None: + req.add_data(data) + + req.timeout = timeout + protocol = req.get_type() + + # pre-process request + meth_name = protocol+"_request" + for processor in self.process_request.get(protocol, []): + meth = getattr(processor, meth_name) + req = meth(req) + + response = self._open(req, data) + + # post-process response + meth_name = protocol+"_response" + for processor in self.process_response.get(protocol, []): + meth = getattr(processor, meth_name) + response = meth(req, response) + + return response \ No newline at end of file diff --git a/ndg/httpsclient/get.py b/ndg/httpsclient/utils.py similarity index 97% rename from ndg/httpsclient/get.py rename to ndg/httpsclient/utils.py index 3b87c7d..cc1a5b3 100644 --- a/ndg/httpsclient/get.py +++ b/ndg/httpsclient/utils.py @@ -3,7 +3,13 @@ import logging from optparse import OptionParser import os +import sys import urllib2 +if sys.version_info < (2, 6, 2): + from ndg.httpsclient.urllib2_proxy import HTTPHandler +else: + from urllib2 import HTTPHandler + import urlparse from ndg.httpsclient.urllib2_build_opener import build_opener @@ -69,7 +75,7 @@ def open_url(url, config): handlers = [cookie_handler] if config.debug: - http_handler = urllib2.HTTPHandler(debuglevel=debuglevel) + http_handler = HTTPHandler(debuglevel=debuglevel) https_handler = HTTPSContextHandler(config.ssl_context, debuglevel=debuglevel) handlers.extend([http_handler, https_handler]) diff --git a/setup.py b/setup.py index 17a38aa..c2ed6f8 100644 --- a/setup.py +++ b/setup.py @@ -8,14 +8,16 @@ setup( name='ndg_httpsclient', version="0.1.0", - description='Provides HTTPS for httplib and urllib2 using PyOpenSSL', - author='Richard Wilkinson', + description='Provides enhanced HTTPS support for httplib and urllib2 using ' + 'PyOpenSSL', + author='Richard Wilkinson and Philip Kershaw', long_description=open('README').read(), license='BSD - See LICENCE file for details', namespace_packages=['ndg'], packages=find_packages(), + install_requires = ['PyOpenSSL'], classifiers = [ - 'Development Status :: 5 - Production/Stable', + 'Development Status :: 3 - Alpha', 'Environment :: Console', 'Environment :: Web Environment', 'Intended Audience :: End Users/Desktop', @@ -36,7 +38,7 @@ ], zip_safe = False, entry_points = { - 'console_scripts': ['ndg_httpclient = myproxy.script:main', + 'console_scripts': ['ndg_httpclient = ndg.httpsclient.utils:main', ], } )