From 2d39b8af0dd678ee2fe7aaa76f34186d66da869e Mon Sep 17 00:00:00 2001
From: Raitis Berzins <raitisbe@gmail.com>
Date: Mon, 21 Jan 2019 13:27:24 +0200
Subject: [PATCH] Add python3 enabled hsproxy

---
 lib/hsproxy_py3.cgi | 258 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100755 lib/hsproxy_py3.cgi

diff --git a/lib/hsproxy_py3.cgi b/lib/hsproxy_py3.cgi
new file mode 100755
index 0000000000..1f1629fa8c
--- /dev/null
+++ b/lib/hsproxy_py3.cgi
@@ -0,0 +1,258 @@
+#!/usr/bin/env python
+
+"""This is a blind proxy that we use to get around browser
+restrictions that prevent the Javascript from loading pages not on the
+same server as the Javascript.  This has several problems: it's less
+efficient, it might break some sites, and it's a security risk because
+people can use this proxy to browse the web and possibly do bad stuff
+with it.  It only loads pages via http and https, but it can load any
+content type. It supports GET and POST requests.
+
+.. attribute:: allowedHosts
+
+    list of allowed hosts or None
+
+.. attribute:: encodings
+
+    list of supported encodings for conversion
+
+"""
+
+import requests
+try:
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse
+import cgi, cgitb
+import codecs
+import sys, os
+import re
+import logging
+from imp import reload
+from io import BytesIO
+
+reload(sys)
+#sys.setdefaultencoding('utf-8')
+cgitb.enable()
+
+#Proxy config
+PROXY = {
+#    "http": "http://user:password@10.0.0.1:80",
+#    "https": HTTPS
+}
+
+#loglevel
+LEVEL = logging.DEBUG
+
+# allowedHosts = ['www.openlayers.org', 'openlayers.org']
+allowedHosts = None
+
+# list of encodings, which will be used for conversion, when the input is
+# not text/xml mimetype
+encodings = ["utf-8","windows-1250","iso-8859-2","iso-8859-1","windows-1251"]
+
+def check_for_bom(s):
+    bom_info = (
+        ('\xc3\xaf\xc2\xbb\xc2\xbf',    3, 'UTF-8'),
+        (codecs.BOM_UTF8,    3, 'UTF-8'),
+        )
+    logging.debug("Start")
+    for sig, siglen, enc in bom_info:
+        if s.startswith(sig):
+            logging.debug("BOM has been found!")
+            return s[siglen:]
+    logging.debug("BOM check ended!")
+    return s
+
+def encode(data,toEncoding,contentType):
+    """encode downloaded text to some other requested encoding
+    :param data: string object
+    :param contentType: content type of the output
+    :returns: converted string
+    """
+
+    # try for each supported encoding
+    for encoding in encodings:
+        logging.debug("\n\n\nTrying to convert from %s to %s" % (encoding,toEncoding))
+        regx = re.compile(encoding.encode(),re.IGNORECASE)
+        fromEncoding = re.search(regx,data)
+        logging.debug("Looking after %s: %s" % (encoding,fromEncoding))
+        try:
+            # read the data in given encoding, convert to target encoding
+            # replace potential encoding name in the data
+            if contentType == "text/xml" and fromEncoding:
+                logging.debug("Converting text/xml from %s to %s" % (encoding,toEncoding))
+                data = check_for_bom(data)
+                sys.stdout = codecs.getwriter(encoding)(sys.stdout)
+                return regx.sub(toEncoding,data.decode(encoding).encode(toEncoding))
+            elif not contentType == "text/xml":
+                logging.debug("Trying to convert %s from %s to %s" % (contentType,encoding,toEncoding))
+                sys.stdout = codecs.getwriter(toEncoding)(sys.stdout)
+                # do not replace anything, just make the conversion
+                return data.decode(encoding).encode(toEncoding)
+        except:
+            raise
+
+    # nothing was returned and so we end up here, raise error
+    logging.warning("Could not convert data to target encoding [%s], tryed one of %s" %\
+            (toEncoding,encodings))
+
+    return data
+
+def main():
+    """request the data from remote serser, based on POST or GET request
+    possibly make the encoding conversion"""
+
+    #Apache has to be set to obtain the environmental variable
+    #See README.txt
+    if os.environ.get("HTTP_AUTHORIZATION",""):
+        authDigest = os.environ.get("HTTP_AUTHORIZATION","")
+    else:
+        authDigest = None #Was none in Boiko times
+
+    method = os.environ["REQUEST_METHOD"]
+    try:
+        oscookie = os.environ.get("HTTP_COOKIE","")
+    except KeyError as e:
+        cookie = False
+
+    logging.debug("Cookie: %s" % (oscookie))
+
+    toEncoding = None
+
+    # read the data from POST request
+    if method == "POST" or\
+            method == "PUT" or \
+            method == "DELETE":
+        qs = os.environ["QUERY_STRING"]
+        d = cgi.parse_qs(qs)
+        if d.has_key("url"):
+            url = d["url"][0]
+        else:
+            url = "http://www.hsrs.cz"
+
+	#initialize jsessionid
+        if d.has_key("jsessionid"):
+            jsessionid = d["jsessionid"][0]
+        else:
+            jsessionid = None
+
+        if "toEncoding" in d:
+            toEncoding = d["toEncoding"][0]
+
+    # read the data from GET request
+    elif method == "GET":
+        fs = cgi.FieldStorage()
+        jsessionid = fs.getvalue('jsessionid')
+        logging.debug("Parameter jsessionid : %s" %(jsessionid))
+        url = fs.getvalue('url')
+        logging.debug("Parameter url : %s" %(url))
+        toEncoding = fs.getvalue('toEncoding', None)
+    try:
+        host = url.split("/")[2]
+
+        if not url.startswith("http"):
+            url = "http://%s/%s" % (os.environ["SERVER_NAME"],url)
+
+
+
+        if allowedHosts and not host in allowedHosts:
+            print ("Status: 502 Bad Gateway")
+            print ("Content-Type: text/plain")
+            print ()
+            print ("This proxy does not allow you to access that location (%s)." % (host,))
+            print ()
+            print (os.environ)
+
+        elif url.startswith("http://") or url.startswith("https://"):
+            session = requests.Session()
+            if jsessionid != None:
+                #From the GET parameter
+                cookie = {'JSESSIONID': jsessionid}
+            else:
+                #Header cookie format example: JSESSIONID=DKAdNiwe; GUEST_LANGUAGE=en_GB;
+                #Requests cookie format {JSESSIONID:DKAdNiwe, GUEST_LANGUAGE:en_GB}
+                oscookie = oscookie.split('; ')
+                handler = {}
+
+                for c in oscookie:
+                    c = c.split('=')
+                    if len( c ) > 1:
+                        handler[c[0]] = c[1]
+                cookie = handler
+
+            #Resends Basic Authentication
+            if authDigest != None:
+                headers = {'Authorization': authDigest}
+            else:
+                headers = {}
+            logging.debug('Headeri')
+            logging.debug (headers)
+            req = requests.Request(method, url,cookies=cookie, headers=headers)
+
+
+            prepped = req.prepare()
+
+            #verify=False doesn't check certificate
+            resp = session.send(prepped, proxies=PROXY, verify = False)
+
+
+            #encoding issue with requests. See http://docs.python-requests.org/en/latest/user/advanced/#encodings
+            try:
+                if not resp.headers['encoding']:
+                    print("TEEE")
+                    content = resp.text
+                else:
+                    print("TUUR")
+                    content = resp.read()
+            except Exception as E:
+                content = resp.content
+                #content = bytes(resp.text, 'utf-8')
+            #If the Content type is not specified mostly used text/xml will be used.
+            try:
+                content_type = resp.headers["content-type"]
+            except:
+                content_type = "text/xml"
+                logging.debug("Empty Content-Type set to text/xml %s" % content_type)
+                pass
+            # convert any *xml* content type to "text/xml", so that
+            # browsers can parse it easy.
+            # this applyes especially to something like
+            # application/vnd.ogc.wms_xml, produced by UMN MapServer
+            if content_type and \
+               (content_type.find("xml") > -1 or\
+               content_type.find("gml") > -1):
+                logging.debug("%s Content-Type set to text/xml" % content_type)
+                content_type = "text/xml"
+                print ("Content-Type: text/xml; charset=%s" % toEncoding)
+            else:
+                logging.debug("Content-Type set to %s" % content_type)
+                if toEncoding!=None:
+                    print ("Content-Type: %s; charset=%s" % (content_type,toEncoding))
+                else:
+                    print("Content-Type: %s;" % content_type)
+
+            # Http has to have one clear line after the Content type clausule
+            print()
+            sys.stdout.flush()
+            sys.stdout.buffer.write(content)
+
+            resp.close()
+        else:
+            print ("Content-Type: text/plain")
+            print ()
+            print ("Illegal request.")
+
+    except Exception as E:
+        print ("Status: 500 Unexpected Error")
+        print ("Content-Type: text/html")
+        print ()
+        print ("Some unexpected error occurred. Error text was:", E)
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print (exc_type, fname, exc_tb.tb_lineno)
+
+if __name__ == "__main__":
+    logging.basicConfig(level=LEVEL)
+    main()
+