diff --git a/.gitignore b/.gitignore index 661f23e..251bf26 100644 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,5 @@ coverage.xml # Sphinx documentation docs/_build/ -venv/ \ No newline at end of file +venv/ +.venv/ \ No newline at end of file diff --git a/server.py b/server.py index c02b8a4..b60e390 100644 --- a/server.py +++ b/server.py @@ -9,10 +9,10 @@ from functools import wraps from flask import request import re +from cachelib import SimpleCache app = Flask(__name__) -from werkzeug.contrib.cache import SimpleCache cache = SimpleCache() # URL templates fuer den Scraper @@ -21,7 +21,7 @@ "line_details": "/haltestellen/showline/{station_id:d}/{line_id:d}/", "schedule_table": "/haltestellen/aushang/{station_id:d}/", "schedule_pocket": "/haltestellen/miniplan/{station_id:d}/", - "departures": "/qr/{station_id:d}/" + "departures": "/qr/{station_id:d}/", } # Die brauchen wir bei jeder Anfrage @@ -29,7 +29,8 @@ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36" } -def cached(timeout=5 * 60, key='view/%s'): + +def cached(timeout=5 * 60, key="view/%s"): def decorator(f): @wraps(f) def decorated_function(*args, **kwargs): @@ -40,7 +41,9 @@ def decorated_function(*args, **kwargs): rv = f(*args, **kwargs) cache.set(cache_key, rv, timeout=timeout) return rv + return decorated_function + return decorator @@ -52,24 +55,19 @@ def get_stations(): url = "https://www.kvb.koeln/haltestellen/overview/" r = requests.get(url, headers=HEADERS) soup = BeautifulSoup(r.text) - #print(soup.prettify()) + # print(soup.prettify()) mystations = [] for a in soup.find_all("a"): - #print(a, a.get("href"), a.text) + # print(a, a.get("href"), a.text) href = a.get("href") if href is None: continue - result = parse( - URL_TEMPLATES["station_details"], - href) + result = parse(URL_TEMPLATES["station_details"], href) if result is None: continue - mystations.append({ - "id": int(result["station_id"]), - "name": a.text - }) + mystations.append({"id": int(result["station_id"]), "name": a.text}) # sort by id - mystations = sorted(mystations, key=lambda k: k['id']) + mystations = sorted(mystations, key=lambda k: k["id"]) station_dict = {} for s in mystations: station_dict[s["id"]] = s["name"] @@ -86,16 +84,14 @@ def get_station_details(station_id): details = { "station_id": station_id, "name": stations[station_id], - "line_ids": set() + "line_ids": set(), } div = soup.find("ul", class_="info-list") for a in div.find_all("a"): href = a.get("href") if href is None: continue - result = parse( - URL_TEMPLATES["line_details"], - href) + result = parse(URL_TEMPLATES["line_details"], href) if result is None: continue details["line_ids"].add(result["line_id"]) @@ -107,21 +103,20 @@ def get_line_details(station_id, line_id): """ Findet heraus, welche Stationen eine Linie anfährt """ - url = "https://www.kvb.koeln/haltestellen/showline/%d/%d/" % ( - station_id, line_id) + url = "https://www.kvb.koeln/haltestellen/showline/%d/%d/" % (station_id, line_id) r = requests.get(url, headers=HEADERS) soup = BeautifulSoup(r.text) details = { "station_id": station_id, "line_id": line_id, "stations_forward": [], - "stations_reverse": [] + "stations_reverse": [], } station_key = "stations_forward" count = 0 for td in soup.find_all("td", class_=re.compile(".*station")): tdclass = td.get("class")[0] - if tdclass == u'station-top': + if tdclass == "station-top": count = count + 1 if count == 2: station_key = "stations_reverse" @@ -132,9 +127,7 @@ def get_line_details(station_id, line_id): href = a.get("href") if href is None: continue - result = parse( - URL_TEMPLATES["station_details"], - href) + result = parse(URL_TEMPLATES["station_details"], href) if result is None: continue details[station_key].append(int(result["station_id"])) @@ -153,9 +146,9 @@ def get_departures(station_id): for row in tables[0].find_all("tr"): tds = row.find_all("td") (line_id, direction, time) = (tds[0].text, tds[1].text, tds[2].text) - line_id = line_id.replace(u"\xa0", "") - direction = direction.replace(u"\xa0", "") - time = time.replace(u"\xa0", " ").strip().lower() + line_id = line_id.replace("\xa0", "") + direction = direction.replace("\xa0", "") + time = time.replace("\xa0", " ").strip().lower() if time == "sofort": time = "0" time = time.replace(" min", "") @@ -164,11 +157,9 @@ def get_departures(station_id): except: pass print(line_id, direction, time) - departures.append({ - "line_id": line_id, - "direction": direction, - "wait_time": time - }) + departures.append( + {"line_id": line_id, "direction": direction, "wait_time": time} + ) return departures @@ -180,8 +171,8 @@ def index(): "station_list": "/stations/", "station_details": "/stations/{station_id}/", "departures": "/stations/{station_id}/departures/", - "line_details": "/stations/{station_id}/lines/{line_id}/" - } + "line_details": "/stations/{station_id}/lines/{line_id}/", + }, } return json.dumps(output) @@ -211,17 +202,21 @@ def station_departuress(station_id): details = get_departures(station_id) return json.dumps(details) + # Add CORS header to every request @app.after_request def add_cors(resp): - resp.headers['Access-Control-Allow-Origin'] = request.headers.get('Origin','*') - resp.headers['Access-Control-Allow-Credentials'] = 'true' - resp.headers['Access-Control-Allow-Methods'] = 'POST, OPTIONS, GET' - resp.headers['Access-Control-Allow-Headers'] = request.headers.get('Access-Control-Request-Headers', 'Authorization' ) + resp.headers["Access-Control-Allow-Origin"] = request.headers.get("Origin", "*") + resp.headers["Access-Control-Allow-Credentials"] = "true" + resp.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS, GET" + resp.headers["Access-Control-Allow-Headers"] = request.headers.get( + "Access-Control-Request-Headers", "Authorization" + ) if app.debug: - resp.headers['Access-Control-Max-Age'] = '1' + resp.headers["Access-Control-Max-Age"] = "1" return resp + if __name__ == "__main__": stations = get_stations() stations_reverse = {}