Skip to content

Commit

Permalink
Add python3 enabled hsproxy
Browse files Browse the repository at this point in the history
  • Loading branch information
raitisbe committed Jan 21, 2019
1 parent 11d0fbd commit 2d39b8a
Showing 1 changed file with 258 additions and 0 deletions.
258 changes: 258 additions & 0 deletions lib/hsproxy_py3.cgi
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#!/usr/bin/env python

"""This is a blind proxy that we use to get around browser
restrictions that prevent the Javascript from loading pages not on the
same server as the Javascript. This has several problems: it's less
efficient, it might break some sites, and it's a security risk because
people can use this proxy to browse the web and possibly do bad stuff
with it. It only loads pages via http and https, but it can load any
content type. It supports GET and POST requests.
.. attribute:: allowedHosts
list of allowed hosts or None
.. attribute:: encodings
list of supported encodings for conversion
"""

import requests
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
import cgi, cgitb
import codecs
import sys, os
import re
import logging
from imp import reload
from io import BytesIO

reload(sys)
#sys.setdefaultencoding('utf-8')
cgitb.enable()

#Proxy config
PROXY = {
# "http": "http://user:password@10.0.0.1:80",
# "https": HTTPS
}

#loglevel
LEVEL = logging.DEBUG

# allowedHosts = ['www.openlayers.org', 'openlayers.org']
allowedHosts = None

# list of encodings, which will be used for conversion, when the input is
# not text/xml mimetype
encodings = ["utf-8","windows-1250","iso-8859-2","iso-8859-1","windows-1251"]

def check_for_bom(s):
bom_info = (
('\xc3\xaf\xc2\xbb\xc2\xbf', 3, 'UTF-8'),
(codecs.BOM_UTF8, 3, 'UTF-8'),
)
logging.debug("Start")
for sig, siglen, enc in bom_info:
if s.startswith(sig):
logging.debug("BOM has been found!")
return s[siglen:]
logging.debug("BOM check ended!")
return s

def encode(data,toEncoding,contentType):
"""encode downloaded text to some other requested encoding
:param data: string object
:param contentType: content type of the output
:returns: converted string
"""

# try for each supported encoding
for encoding in encodings:
logging.debug("\n\n\nTrying to convert from %s to %s" % (encoding,toEncoding))
regx = re.compile(encoding.encode(),re.IGNORECASE)
fromEncoding = re.search(regx,data)
logging.debug("Looking after %s: %s" % (encoding,fromEncoding))
try:
# read the data in given encoding, convert to target encoding
# replace potential encoding name in the data
if contentType == "text/xml" and fromEncoding:
logging.debug("Converting text/xml from %s to %s" % (encoding,toEncoding))
data = check_for_bom(data)
sys.stdout = codecs.getwriter(encoding)(sys.stdout)
return regx.sub(toEncoding,data.decode(encoding).encode(toEncoding))
elif not contentType == "text/xml":
logging.debug("Trying to convert %s from %s to %s" % (contentType,encoding,toEncoding))
sys.stdout = codecs.getwriter(toEncoding)(sys.stdout)
# do not replace anything, just make the conversion
return data.decode(encoding).encode(toEncoding)
except:
raise

# nothing was returned and so we end up here, raise error
logging.warning("Could not convert data to target encoding [%s], tryed one of %s" %\
(toEncoding,encodings))

return data

def main():
"""request the data from remote serser, based on POST or GET request
possibly make the encoding conversion"""

#Apache has to be set to obtain the environmental variable
#See README.txt
if os.environ.get("HTTP_AUTHORIZATION",""):
authDigest = os.environ.get("HTTP_AUTHORIZATION","")
else:
authDigest = None #Was none in Boiko times

method = os.environ["REQUEST_METHOD"]
try:
oscookie = os.environ.get("HTTP_COOKIE","")
except KeyError as e:
cookie = False

logging.debug("Cookie: %s" % (oscookie))

toEncoding = None

# read the data from POST request
if method == "POST" or\
method == "PUT" or \
method == "DELETE":
qs = os.environ["QUERY_STRING"]
d = cgi.parse_qs(qs)
if d.has_key("url"):
url = d["url"][0]
else:
url = "http://www.hsrs.cz"

#initialize jsessionid
if d.has_key("jsessionid"):
jsessionid = d["jsessionid"][0]
else:
jsessionid = None

if "toEncoding" in d:
toEncoding = d["toEncoding"][0]

# read the data from GET request
elif method == "GET":
fs = cgi.FieldStorage()
jsessionid = fs.getvalue('jsessionid')
logging.debug("Parameter jsessionid : %s" %(jsessionid))
url = fs.getvalue('url')
logging.debug("Parameter url : %s" %(url))
toEncoding = fs.getvalue('toEncoding', None)
try:
host = url.split("/")[2]

if not url.startswith("http"):
url = "http://%s/%s" % (os.environ["SERVER_NAME"],url)



if allowedHosts and not host in allowedHosts:
print ("Status: 502 Bad Gateway")
print ("Content-Type: text/plain")
print ()
print ("This proxy does not allow you to access that location (%s)." % (host,))
print ()
print (os.environ)

elif url.startswith("http://") or url.startswith("https://"):
session = requests.Session()
if jsessionid != None:
#From the GET parameter
cookie = {'JSESSIONID': jsessionid}
else:
#Header cookie format example: JSESSIONID=DKAdNiwe; GUEST_LANGUAGE=en_GB;
#Requests cookie format {JSESSIONID:DKAdNiwe, GUEST_LANGUAGE:en_GB}
oscookie = oscookie.split('; ')
handler = {}

for c in oscookie:
c = c.split('=')
if len( c ) > 1:
handler[c[0]] = c[1]
cookie = handler

#Resends Basic Authentication
if authDigest != None:
headers = {'Authorization': authDigest}
else:
headers = {}
logging.debug('Headeri')
logging.debug (headers)
req = requests.Request(method, url,cookies=cookie, headers=headers)


prepped = req.prepare()

#verify=False doesn't check certificate
resp = session.send(prepped, proxies=PROXY, verify = False)


#encoding issue with requests. See http://docs.python-requests.org/en/latest/user/advanced/#encodings
try:
if not resp.headers['encoding']:
print("TEEE")
content = resp.text
else:
print("TUUR")
content = resp.read()
except Exception as E:
content = resp.content
#content = bytes(resp.text, 'utf-8')
#If the Content type is not specified mostly used text/xml will be used.
try:
content_type = resp.headers["content-type"]
except:
content_type = "text/xml"
logging.debug("Empty Content-Type set to text/xml %s" % content_type)
pass
# convert any *xml* content type to "text/xml", so that
# browsers can parse it easy.
# this applyes especially to something like
# application/vnd.ogc.wms_xml, produced by UMN MapServer
if content_type and \
(content_type.find("xml") > -1 or\
content_type.find("gml") > -1):
logging.debug("%s Content-Type set to text/xml" % content_type)
content_type = "text/xml"
print ("Content-Type: text/xml; charset=%s" % toEncoding)
else:
logging.debug("Content-Type set to %s" % content_type)
if toEncoding!=None:
print ("Content-Type: %s; charset=%s" % (content_type,toEncoding))
else:
print("Content-Type: %s;" % content_type)

# Http has to have one clear line after the Content type clausule
print()
sys.stdout.flush()
sys.stdout.buffer.write(content)

resp.close()
else:
print ("Content-Type: text/plain")
print ()
print ("Illegal request.")

except Exception as E:
print ("Status: 500 Unexpected Error")
print ("Content-Type: text/html")
print ()
print ("Some unexpected error occurred. Error text was:", E)
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print (exc_type, fname, exc_tb.tb_lineno)

if __name__ == "__main__":
logging.basicConfig(level=LEVEL)
main()

0 comments on commit 2d39b8a

Please sign in to comment.