Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewritten download script and added additional prefetch check if zonefile changed #5

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions zonedata-download/config.sample.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"base_url": "https://czdap.icann.org",
"token": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
"base_url": "https://czds.icann.org",
"token": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"prefetch": true
}
165 changes: 110 additions & 55 deletions zonedata-download/download.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,116 @@
#!/usr/bin/env python
# -*- coding:utf-8
import requests, json, sys, os, re, datetime

import requests
import json
import sys
from urlparse import urlparse
import os
class czdsException(Exception):
pass

# Create a session
s = requests.Session()
class czdsDownloader(object):
file_syntax_re = re.compile("""^(\d{8})\-([a-z\-0-9]+)\-zone\-data\.txt\.gz""", re.IGNORECASE)
content_disposition_header_re = re.compile('^attachment; filename="([^"]+)"', re.IGNORECASE)

def __init__(self):
""" Create a session
"""
self.s = requests.Session()
self.td = datetime.datetime.today()

def readConfig(self, configFilename = 'config.json'):
try:
self.conf = json.load(open(configFilename))
except:
raise czdsException("Error loading '" + configFilename + "' file.")

def prepareDownloadFolder(self):
directory = './zonedata-download/zonefiles.' + self.td.strftime('%Y%m%d')
if not os.path.exists(directory):
os.makedirs(directory)
return directory

def getZonefilesList(self):
""" Get all the files that need to be downloaded using CZDS API.
"""
r = self.s.get(self.conf['base_url'] + '/user-zone-data-urls.json?token=' + self.conf['token'])
if r.status_code != 200:
raise czdsException("Unexpected response from CZDS while fetching urls list.")

try:
files = json.loads(r.text)
except Exception, e:
raise czdsException("Unable to parse JSON returned from CZDS: " + str(e))

return files

def parseHeaders(self, headers):
if not 'content-disposition' in headers:
raise czdsException("Missing required 'content-disposition' header in HTTP call response.")
elif not 'content-length' in headers:
raise czdsException("Missing required 'content-length' header in HTTP call response.")

f = self.content_disposition_header_re.search(headers['content-disposition'])
if not f:
raise czdsException("'content-disposition' header does not match.")

filename = f.group(1)

f = self.file_syntax_re.search(filename)
if not f:
raise czdsException("filename does not match.")

return {
'date': f.group(1),
'zone': f.group(2),
'filename': filename,
'filesize': int(headers['content-length'])
}

def prefetchZone(self, path):
""" Do a HTTP HEAD call to check if filesize changed
"""
r = self.s.head(self.conf['base_url'] + path)
if r.status_code != 200:
raise czdsException("Unexpected response from CZDS while fetching '" + path + "'.")
return self.parseHeaders(r.headers)

def isNewZone(self, directory, hData):
""" Check if local zonefile exists and has identical filesize
"""
for filename in os.listdir(directory):
if hData['date'] + '-' + hData['zone'] + '-' in filename \
and hData['filesize'] == os.path.getsize(directory + '/' + filename):
return False
return True

def fetchZone(self, directory, path, chunksize = 1024):
""" Do a regular GET call to fetch zonefile
"""
r = self.s.get(self.conf['base_url'] + path, stream = True)
if r.status_code != 200:
raise czdsException("Unexpected response from CZDS while fetching '" + path + "'.")
hData = self.parseHeaders(r.headers)
outputFile = directory + '/' + hData['date'] + '-' + hData['zone'] + '-' + self.td.strftime('%H%M') + '.zone.gz'

with open(outputFile, 'wb') as f:
for chunk in r.iter_content(chunksize):
f.write(chunk)

def fetch(self):
directory = self.prepareDownloadFolder()
paths = self.getZonefilesList()
""" Grab each file.
"""
for path in paths:
if 'prefetch' in self.conf and self.conf['prefetch']:
hData = self.prefetchZone(path)
if not self.isNewZone(directory, hData):
continue
self.fetchZone(directory, path)

# Load the config file
try:
configFile = open("config.json", "r")
config = json.load(configFile)
configFile.close()
except:
sys.stderr.write("Error loading config.json file.\n")
exit(1)
if not config.has_key('token'):
sys.stderr.write("'token' parameter not found in the config.json file\n")
exit(1)
if not config.has_key('base_url'):
sys.stderr.write("'base_url' parameter not found in the config.json file\n")
exit(1)

# For development purposes, we sometimes run this against an environment with
# basic auth and a self-signed certificate. If these params are present, use
# them. If you're not a developer working on CZDAP itself, ignore these.
if config.has_key('auth_user') and config.has_key('auth_pass'):
s.auth = (config['auth_user'], config['auth_pass'])
if config.has_key('ssl_skip_verify'):
s.verify = False

# Get all the files that need to be downloaded using CZDAP API.
r = s.get(config['base_url'] + '/user-zone-data-urls.json?token=' + config['token'])
if r.status_code != 200:
sys.stderr.write("Unexpected response from CZDAP. Are you sure your token and base_url are correct in config.json?\n")
exit(1)
try:
urls = json.loads(r.text)
except:
sys.stderr.write("Unable to parse JSON returned from CZDAP.\n")
exit(1)

# Grab each file.
for url in urls:
r = s.get(config['base_url'] + url)
if r.status_code == 200:
parsed_url = urlparse(r.url)
filename = os.path.basename(parsed_url.path)
directory = './zonefiles'
if not os.path.exists(directory):
os.makedirs(directory)
path = directory + '/' + filename + '.txt.gz'
with open(path, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
else:
sys.stderr.write("Unexpected HTTP response for URL " + url + "\n")
downloader = czdsDownloader()
downloader.readConfig()
downloader.fetch()
except Exception, e:
sys.stderr.write("Error occoured: " + str(e) + "\n")
exit(1)