From 2d39b8af0dd678ee2fe7aaa76f34186d66da869e Mon Sep 17 00:00:00 2001 From: Raitis Berzins Date: Mon, 21 Jan 2019 13:27:24 +0200 Subject: [PATCH] Add python3 enabled hsproxy --- lib/hsproxy_py3.cgi | 258 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100755 lib/hsproxy_py3.cgi diff --git a/lib/hsproxy_py3.cgi b/lib/hsproxy_py3.cgi new file mode 100755 index 0000000000..1f1629fa8c --- /dev/null +++ b/lib/hsproxy_py3.cgi @@ -0,0 +1,258 @@ +#!/usr/bin/env python + +"""This is a blind proxy that we use to get around browser +restrictions that prevent the Javascript from loading pages not on the +same server as the Javascript. This has several problems: it's less +efficient, it might break some sites, and it's a security risk because +people can use this proxy to browse the web and possibly do bad stuff +with it. It only loads pages via http and https, but it can load any +content type. It supports GET and POST requests. + +.. attribute:: allowedHosts + + list of allowed hosts or None + +.. attribute:: encodings + + list of supported encodings for conversion + +""" + +import requests +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse +import cgi, cgitb +import codecs +import sys, os +import re +import logging +from imp import reload +from io import BytesIO + +reload(sys) +#sys.setdefaultencoding('utf-8') +cgitb.enable() + +#Proxy config +PROXY = { +# "http": "http://user:password@10.0.0.1:80", +# "https": HTTPS +} + +#loglevel +LEVEL = logging.DEBUG + +# allowedHosts = ['www.openlayers.org', 'openlayers.org'] +allowedHosts = None + +# list of encodings, which will be used for conversion, when the input is +# not text/xml mimetype +encodings = ["utf-8","windows-1250","iso-8859-2","iso-8859-1","windows-1251"] + +def check_for_bom(s): + bom_info = ( + ('\xc3\xaf\xc2\xbb\xc2\xbf', 3, 'UTF-8'), + (codecs.BOM_UTF8, 3, 'UTF-8'), + ) + logging.debug("Start") + for sig, siglen, enc in bom_info: + if s.startswith(sig): + logging.debug("BOM has been found!") + return s[siglen:] + logging.debug("BOM check ended!") + return s + +def encode(data,toEncoding,contentType): + """encode downloaded text to some other requested encoding + :param data: string object + :param contentType: content type of the output + :returns: converted string + """ + + # try for each supported encoding + for encoding in encodings: + logging.debug("\n\n\nTrying to convert from %s to %s" % (encoding,toEncoding)) + regx = re.compile(encoding.encode(),re.IGNORECASE) + fromEncoding = re.search(regx,data) + logging.debug("Looking after %s: %s" % (encoding,fromEncoding)) + try: + # read the data in given encoding, convert to target encoding + # replace potential encoding name in the data + if contentType == "text/xml" and fromEncoding: + logging.debug("Converting text/xml from %s to %s" % (encoding,toEncoding)) + data = check_for_bom(data) + sys.stdout = codecs.getwriter(encoding)(sys.stdout) + return regx.sub(toEncoding,data.decode(encoding).encode(toEncoding)) + elif not contentType == "text/xml": + logging.debug("Trying to convert %s from %s to %s" % (contentType,encoding,toEncoding)) + sys.stdout = codecs.getwriter(toEncoding)(sys.stdout) + # do not replace anything, just make the conversion + return data.decode(encoding).encode(toEncoding) + except: + raise + + # nothing was returned and so we end up here, raise error + logging.warning("Could not convert data to target encoding [%s], tryed one of %s" %\ + (toEncoding,encodings)) + + return data + +def main(): + """request the data from remote serser, based on POST or GET request + possibly make the encoding conversion""" + + #Apache has to be set to obtain the environmental variable + #See README.txt + if os.environ.get("HTTP_AUTHORIZATION",""): + authDigest = os.environ.get("HTTP_AUTHORIZATION","") + else: + authDigest = None #Was none in Boiko times + + method = os.environ["REQUEST_METHOD"] + try: + oscookie = os.environ.get("HTTP_COOKIE","") + except KeyError as e: + cookie = False + + logging.debug("Cookie: %s" % (oscookie)) + + toEncoding = None + + # read the data from POST request + if method == "POST" or\ + method == "PUT" or \ + method == "DELETE": + qs = os.environ["QUERY_STRING"] + d = cgi.parse_qs(qs) + if d.has_key("url"): + url = d["url"][0] + else: + url = "http://www.hsrs.cz" + + #initialize jsessionid + if d.has_key("jsessionid"): + jsessionid = d["jsessionid"][0] + else: + jsessionid = None + + if "toEncoding" in d: + toEncoding = d["toEncoding"][0] + + # read the data from GET request + elif method == "GET": + fs = cgi.FieldStorage() + jsessionid = fs.getvalue('jsessionid') + logging.debug("Parameter jsessionid : %s" %(jsessionid)) + url = fs.getvalue('url') + logging.debug("Parameter url : %s" %(url)) + toEncoding = fs.getvalue('toEncoding', None) + try: + host = url.split("/")[2] + + if not url.startswith("http"): + url = "http://%s/%s" % (os.environ["SERVER_NAME"],url) + + + + if allowedHosts and not host in allowedHosts: + print ("Status: 502 Bad Gateway") + print ("Content-Type: text/plain") + print () + print ("This proxy does not allow you to access that location (%s)." % (host,)) + print () + print (os.environ) + + elif url.startswith("http://") or url.startswith("https://"): + session = requests.Session() + if jsessionid != None: + #From the GET parameter + cookie = {'JSESSIONID': jsessionid} + else: + #Header cookie format example: JSESSIONID=DKAdNiwe; GUEST_LANGUAGE=en_GB; + #Requests cookie format {JSESSIONID:DKAdNiwe, GUEST_LANGUAGE:en_GB} + oscookie = oscookie.split('; ') + handler = {} + + for c in oscookie: + c = c.split('=') + if len( c ) > 1: + handler[c[0]] = c[1] + cookie = handler + + #Resends Basic Authentication + if authDigest != None: + headers = {'Authorization': authDigest} + else: + headers = {} + logging.debug('Headeri') + logging.debug (headers) + req = requests.Request(method, url,cookies=cookie, headers=headers) + + + prepped = req.prepare() + + #verify=False doesn't check certificate + resp = session.send(prepped, proxies=PROXY, verify = False) + + + #encoding issue with requests. See http://docs.python-requests.org/en/latest/user/advanced/#encodings + try: + if not resp.headers['encoding']: + print("TEEE") + content = resp.text + else: + print("TUUR") + content = resp.read() + except Exception as E: + content = resp.content + #content = bytes(resp.text, 'utf-8') + #If the Content type is not specified mostly used text/xml will be used. + try: + content_type = resp.headers["content-type"] + except: + content_type = "text/xml" + logging.debug("Empty Content-Type set to text/xml %s" % content_type) + pass + # convert any *xml* content type to "text/xml", so that + # browsers can parse it easy. + # this applyes especially to something like + # application/vnd.ogc.wms_xml, produced by UMN MapServer + if content_type and \ + (content_type.find("xml") > -1 or\ + content_type.find("gml") > -1): + logging.debug("%s Content-Type set to text/xml" % content_type) + content_type = "text/xml" + print ("Content-Type: text/xml; charset=%s" % toEncoding) + else: + logging.debug("Content-Type set to %s" % content_type) + if toEncoding!=None: + print ("Content-Type: %s; charset=%s" % (content_type,toEncoding)) + else: + print("Content-Type: %s;" % content_type) + + # Http has to have one clear line after the Content type clausule + print() + sys.stdout.flush() + sys.stdout.buffer.write(content) + + resp.close() + else: + print ("Content-Type: text/plain") + print () + print ("Illegal request.") + + except Exception as E: + print ("Status: 500 Unexpected Error") + print ("Content-Type: text/html") + print () + print ("Some unexpected error occurred. Error text was:", E) + exc_type, exc_obj, exc_tb = sys.exc_info() + fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] + print (exc_type, fname, exc_tb.tb_lineno) + +if __name__ == "__main__": + logging.basicConfig(level=LEVEL) + main() +