Skip to content

Commit

Permalink
robot: Provide a small scraper for the web UI.
Browse files Browse the repository at this point in the history
Some functionality isn't available at the official RESTful API yet, so
this adds a small helper for scraping data off the Robot's web
interface until there is an official implementation in the RESTful API.

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
  • Loading branch information
aszlig committed Jul 17, 2013
1 parent e2e714d commit bbae95a
Showing 1 changed file with 147 additions and 1 deletion.
148 changes: 147 additions & 1 deletion hetzner/robot.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import re
import json
import hashlib

from base64 import b64encode
from urllib import urlencode
from httplib import HTTPSConnection, BadStatusLine
from httplib import HTTPSConnection, BadStatusLine, ResponseNotReady

from hetzner.server import Server

ROBOT_HOST = "robot-ws.your-server.de"
ROBOT_WEBHOST = "robot.your-server.de"


class RobotError(Exception):
Expand All @@ -21,12 +24,155 @@ class ConnectError(Exception):
pass


class WebRobotError(RobotError):
pass


class RobotWebInterface(object):
"""
This is for scraping the web interface and can be used to implement
features that are not yet available in the official API.
"""
def __init__(self, user=None, passwd=None):
self.conn = None
self.session_cookie = None
self.user = user
self.passwd = passwd
self.logged_in = False

def update_session(self, response):
"""
Parses the session cookie from the given response instance and updates
self.session_cookie accordingly if a session cookie was recognized.
"""
for key, value in response.getheaders():
if key.lower() != 'set-cookie':
continue
if not value.startswith("robot="):
continue
self.session_cookie = value.split(';', 1)[0]

def connect(self, force=False):
"""
Establish a connection to the robot web interface if we're not yet
connected. If 'force' is set to True, throw away the old connection and
establish a new one, regardless of whether we are connected or not.
"""
if force and self.conn is not None:
self.conn.close()
self.conn = None
if self.conn is None:
self.conn = HTTPSConnection(ROBOT_WEBHOST)

def login(self, user=None, passwd=None):
"""
Log into the robot web interface using self.user and self.passwd. If
user/passwd is provided as arguments, those are used instead and
self.user/self.passwd are updated accordingly.
"""
if self.logged_in:
return

self.connect()

# Update self.user and self.passwd in case we need to re-establish the
# connection.
if user is not None:
self.user = user
if passwd is not None:
self.passwd = passwd

if self.user is None or self.passwd is None:
raise WebRobotError("Login credentials for the web user interface "
"are missing.")

if self.user.startswith("#ws+"):
raise WebRobotError("The user {0} is a dedicated web service user "
"and cannot be used for scraping the web user "
"interface.")

# This is primarily for getting a first session cookie.
response = self.request('/login', xhr=False)
if response.status != 200:
raise WebRobotError("Invalid status code {0} while visiting login"
" page".format(response.status))

data = {'user': self.user, 'password': self.passwd}
response = self.request('/login/check', data, xhr=False)

if response.status != 302 or response.getheader('Location') is None:
raise WebRobotError("Login to robot web interface failed.")

self.logged_in = True

def get_serverid(self, ip):
"""
Retrieve and return server ID for the main IP address supplied by 'ip'.
"""
self.login()
serverid_re = re.compile(r'value="(\d+)"[^#]*#\1 \((.*?)\)')
data = self.request('/support/server', {}).read()
idstr = dict(map(reversed, serverid_re.findall(data))).get(str(ip))
if idstr is None:
raise WebRobotError("Server ID for IP address {0} not"
" found.".format(ip))
return int(idstr)

def request(self, path, data=None, xhr=True):
"""
Send a request to the web interface, using 'data' for urlencoded POST
data. If 'data' is None (which it is by default), a GET request is sent
instead. A httplib.HTTPResponse is returned on success.
By default this method uses headers for XMLHttpRequests, so if the
request should be an ordinary HTTP request, set 'xhr' to False.
"""
self.connect()

headers = {'Connection': 'keep-alive'}
if self.session_cookie is not None:
headers['Cookie'] = self.session_cookie
if xhr:
headers['X-Requested-With'] = 'XMLHttpRequest'

if data is None:
method = 'GET'
encoded = None
else:
method = 'POST'
encoded = urlencode(data)
headers['Content-Type'] = 'application/x-www-form-urlencoded'

self.conn.request(method, path, encoded, headers)

# Minimal peer certificate validation using a fingerprint
cert = self.conn.sock.getpeercert(binary_form=True)
fpr = hashlib.sha256(cert).hexdigest()
# XXX: Using static fingerprint here until we have implemented #2.
assert fpr == ('c34204f4ffd7df006311a9275fc62e42'
'8a1ccdd71514bfd4aafb7a5b435cbc17')

try:
response = self.conn.getresponse()
except ResponseNotReady:
# Connection closed, so we need to reconnect.
# FIXME: Try to avoid endless loops here!
self.connect(force=True)
return self.request(path, data=data, xhr=xhr)

self.update_session(response)
return response


class RobotConnection(object):
def __init__(self, user, passwd):
self.user = user
self.passwd = passwd
self.conn = HTTPSConnection(ROBOT_HOST)

# Provide this as a way to easily add unsupported API features.
self.scraper = RobotWebInterface(user, passwd)

def _request(self, method, path, data, headers, retry=1):
self.conn.request(method.upper(), path, data, headers)
try:
Expand Down

0 comments on commit bbae95a

Please sign in to comment.