Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename vars to match a consistent, standard style. #713

Merged
merged 3 commits into from
Aug 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions ipwb/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ def checkArgs_index(args):


def checkArgs_replay(args):
suppliedIndexParameter = hasattr(args, 'index') and args.index is not None
likelyPiping = not sys.stdin.isatty()
supplied_index_parameter = hasattr(args, 'index') and \
args.index is not None
likely_piping = not sys.stdin.isatty()

if not suppliedIndexParameter and likelyPiping:
if not supplied_index_parameter and likely_piping:
cdxjIn = ''.join(sys.stdin.readlines())
if len(cdxjIn) == 0: # Daemon was not running, so nothing was indexed
print(('ERROR: The IPFS daemon must be running to pipe input from'
Expand All @@ -50,15 +51,15 @@ def checkArgs_replay(args):
with open(args.index, 'w') as f:
f.write(cdxjIn)

suppliedIndexParameter = True
supplied_index_parameter = True

proxy = None
if hasattr(args, 'proxy') and args.proxy is not None:
print(f'Proxying to {args.proxy}')
proxy = args.proxy

# TODO: add any other sub-arguments for replay here
if suppliedIndexParameter:
if supplied_index_parameter:
replay.start(cdxj_file_path=args.index, proxy=proxy)
else:
print('ERROR: An index file must be specified if not piping, e.g.,')
Expand Down
4 changes: 2 additions & 2 deletions ipwb/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from Crypto.Util.Padding import pad
import base64

from .__init__ import __version__ as ipwbVersion
from .__init__ import __version__ as ipwb_version

DEBUG = False

Expand Down Expand Up @@ -302,7 +302,7 @@ def cdx_cdxj_lines_from_file(warc_path, **enc_comp_opts):
def generate_cdxj_metadata(cdxj_lines=None):
metadata = ['!context ["http://tools.ietf.org/html/rfc7089"]']
metaVals = {
'generator': f'InterPlanetary Wayback {ipwbVersion}',
'generator': f'InterPlanetary Wayback {ipwb_version}',
'created_at': datetime.datetime.now().isoformat()
}
metaVals = f'!meta {json.dumps(metaVals)}'
Expand Down
99 changes: 50 additions & 49 deletions ipwb/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import base64

from werkzeug.routing import BaseConverter
from .__init__ import __version__ as ipwbVersion
from .__init__ import __version__ as ipwb_version


from flask import flash
Expand All @@ -76,7 +76,8 @@ def formatters():

@app.after_request
def set_server_header(response):
response.headers['Server'] = 'InterPlanetary Wayback Replay/' + ipwbVersion
response.headers['Server'] = ('InterPlanetary Wayback Replay/'
f'{ipwb_version}')
response.autocorrect_location_header = False
return response

Expand Down Expand Up @@ -462,7 +463,7 @@ def generate_link_timemap_from_cdxj_lines(

# Extract and trim for host:port prepending
tmurl[2] = '' # Clear TM path
host_and_port = urlunsplit(tmurl) + '/'
host_and_port = f'{urlunsplit(tmurl)}/'

# unsurted URI will never have a scheme, add one
original_uri = f'http://{unsurt(original)}'
Expand Down Expand Up @@ -493,7 +494,7 @@ def generate_link_timemap_from_cdxj_lines(
tm_data += (
f',\n<{host_and_port}memento/{datetime}/{unsurt(surt_uri)}>; '
f'rel="{first_last_str}memento"; datetime="{dt_rfc1123}"')
return tm_data + '\n'
return f'{tm_data}\n'


def generate_cdxj_timemap_from_cdxj_lines(
Expand Down Expand Up @@ -549,15 +550,15 @@ def all_exception_handler(error):

@app.route('/ipwbadmin', strict_slashes=False)
def show_admin():
status = {'ipwbVersion': ipwbVersion,
'ipfsEndpoint': ipwb_utils.IPFSAPI_MUTLIADDRESS}
status = {'ipwb_version': ipwb_version,
'ipfs_endpoint': ipwb_utils.IPFSAPI_MUTLIADDRESS}
index_file = ipwb_utils.get_ipwb_replay_index_path()

memento_info = calculate_memento_info_in_index(index_file)

mCount = memento_info['mementoCount']
m_count = memento_info['memento_count']
unique_urirs = len(memento_info['surt_uris'].keys())
htmlCount = memento_info['htmlCount']
html_count = memento_info['html_count']
oldest_datetime = memento_info['oldest_datetime']
newest_datetime = memento_info['newest_datetime']

Expand All @@ -566,13 +567,13 @@ def show_admin():
# TODO: Calculate actual URI-R/M counts
indexes = [{'path': ipwb_utils.get_ipwb_replay_index_path(),
'enabled': True,
'urimCount': mCount,
'urirCount': unique_urirs}]
'urim_count': m_count,
'urir_count': unique_urirs}]
# TODO: Calculate actual values
summary = {'urimCount': mCount,
'urirCount': unique_urirs,
summary = {'urim_count': m_count,
'urir_count': unique_urirs,
'uris': uris,
'htmlCount': htmlCount,
'html_count': html_count,
'earliest': oldest_datetime,
'latest': newest_datetime}

Expand All @@ -585,14 +586,14 @@ def show_landing_page():
index_file = ipwb_utils.get_ipwb_replay_index_path()
memento_info = calculate_memento_info_in_index(index_file)

mCount = memento_info['mementoCount']
m_count = memento_info['memento_count']
unique_urirs = len(memento_info['surt_uris'].keys())
htmlCount = memento_info['htmlCount']
html_count = memento_info['html_count']

summary = {'index_path': index_file,
'urimCount': mCount,
'urirCount': unique_urirs,
'htmlCount': htmlCount}
'urim_count': m_count,
'urir_count': unique_urirs,
'html_count': html_count}
uris = get_uris_and_datetimes_in_cdxj(index_file)
return render_template('index.html', summary=summary, uris=uris)

Expand All @@ -617,7 +618,7 @@ def show_uri(path, datetime=None):

search_string = surted_uri
if datetime is not None:
search_string = surted_uri + ' ' + datetime
search_string = f'{surted_uri} {datetime}'

cdxj_line = get_cdxj_line_binarySearch(search_string, index_path)

Expand All @@ -632,10 +633,10 @@ def show_uri(path, datetime=None):
return generate_no_mementos_interface(path, datetime)

cdxj_parts = cdxj_line.split(" ", 2)
jObj = json.loads(cdxj_parts[2])
json_object = json.loads(cdxj_parts[2])
datetime = cdxj_parts[1]

digests = jObj['locator'].split('/')
digests = json_object['locator'].split('/')

class HashNotFoundError(Exception):
pass
Expand Down Expand Up @@ -683,38 +684,38 @@ def handler(signum, frame):
print(e)
return "An unknown exception occurred", 500

if 'encryption_method' in jObj:
keyString = None
while keyString is None:
if 'encryption_key' in jObj:
keyString = jObj['encryption_key']
if 'encryption_method' in json_object:
key_string = None
while key_string is None:
if 'encryption_key' in json_object:
key_string = json_object['encryption_key']
else:
askForKey = ('Enter a path for file',
' containing decryption key: \n> ')
keyString = raw_input(askForKey)
ask_for_key = ('Enter a path for file',
' containing decryption key: \n> ')
key_string = raw_input(ask_for_key)

padded_encryption_key = pad(keyString, AES.block_size)
padded_encryption_key = pad(key_string, AES.block_size)
key = base64.b64encode(padded_encryption_key)

nonce = b64decode(jObj['encryption_nonce'])
nonce = b64decode(json_object['encryption_nonce'])
cipher = AES.new(key, AES.MODE_CTR, nonce=nonce)
header = cipher.decrypt(base64.b64decode(header))
payload = cipher.decrypt(base64.b64decode(payload))

hLines = header.decode() \
.replace('\r', '') \
.replace('\n\t', '\t') \
.replace('\n ', ' ') \
.split('\n')
hLines.pop(0)
h_lines = header.decode() \
.replace('\r', '') \
.replace('\n\t', '\t') \
.replace('\n ', ' ') \
.split('\n')
h_lines.pop(0)

status = 200
if 'status_code' in jObj:
status = jObj['status_code']
if 'status_code' in json_object:
status = json_object['status_code']

resp = Response(payload, status=status)

for idx, hLine in enumerate(hLines):
for idx, hLine in enumerate(h_lines):
k, v = hLine.split(':', 1)

if k.lower() == 'transfer-encoding' and \
Expand All @@ -741,7 +742,7 @@ def handler(signum, frame):
<script>injectIPWBJS()</script>"""

new_payload = new_payload.decode('utf-8').replace(
'</html>', ipwb_js_inject + '</html>')
'</html>', f'{ipwb_js_inject}</html>')

resp.set_data(new_payload)

Expand Down Expand Up @@ -941,19 +942,19 @@ def calculate_memento_info_in_index(cdxj_file_path=INDEX_FILE):
print(f'Retrieving URI-Ms from {cdxj_file_path}')
index_file_contents = get_web_archive_index(cdxj_file_path)

errReturn = (0, 0)
err_return = (0, 0)

if not index_file_contents:
return errReturn
return err_return

lines = index_file_contents.strip().split('\n')

if not lines:
return errReturn
return err_return

memento_info = {
'mementoCount': 0,
'htmlCount': 0,
'memento_count': 0,
'html_count': 0,
'surt_uris': {},
'oldest_datetime': None,
'newest_datetime': None
Expand All @@ -963,7 +964,7 @@ def calculate_memento_info_in_index(cdxj_file_path=INDEX_FILE):
valid_cdxj_line = ipwb_utils.is_valid_cdxj_line(l)
metadata_record = ipwb_utils.is_cdxj_metadata_record(l)
if valid_cdxj_line and not metadata_record:
memento_info['mementoCount'] += 1
memento_info['memento_count'] += 1
(surt_uri, datetime, jsonInLine) = l.split(' ', 2)
if surt_uri not in memento_info['surt_uris']:
memento_info['surt_uris'][surt_uri] = 1
Expand All @@ -972,11 +973,11 @@ def calculate_memento_info_in_index(cdxj_file_path=INDEX_FILE):

j = json.loads(jsonInLine)

# Count only non-redirect HTML pages for htmlCount display
# Count only non-redirect HTML pages for html_count display
if j['mime_type'] and \
j['mime_type'].lower().startswith('text/html') and \
j['status_code'][0] != '3':
memento_info['htmlCount'] += 1
memento_info['html_count'] += 1

if memento_info['oldest_datetime'] is None:
memento_info['oldest_datetime'] = datetime
Expand Down
6 changes: 3 additions & 3 deletions ipwb/templates/admin.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ <h1><img src="./ipwbassets/logo.png" alt="ipwb" style="height: 50px; vertical-al
<h2>Status</h2>
<dl>
<dt>IPWB Version</dt>
<dd>{{ status.ipwbVersion }}</dd>
<dd>{{ status.ipwb_version }}</dd>
<dt>IPFS Endpoint</dt>
<dd>{{ status.ipfsEndpoint }} (<a href="#" rel="noreferrer">Change</a> | <a href="#" rel="noreferrer">Web UI</a>)</dd>
<dd>{{ status.ipfs_endpoint }} (<a href="#" rel="noreferrer">Change</a> | <a href="#" rel="noreferrer">Web UI</a>)</dd>
<dt>IPFS Status</dt>
<dd>
<label id="daemonStatusLabel">IPFS Daemon: </label><iframe src="ipfsdaemon/status" id="daemonStatus"></iframe>
Expand All @@ -48,7 +48,7 @@ <h2>Index</h2>
</section>
<section>
<h2>Collection</h2>
<p>{{ summary.urimCount }} mementos of {{ summary.urirCount }} resources with {{ summary.htmlCount }} HTML pages between {{ summary.earliest }} and {{ summary.latest }}</p>
<p>{{ summary.urim_count }} mementos of {{ summary.urir_count }} resources with {{ summary.html_count }} HTML pages between {{ summary.earliest }} and {{ summary.latest }}</p>

<!--<span id="htmlPages">0</span>
HTML page<span id="htmlPagesPlurality">s</span> listed-->
Expand Down
4 changes: 2 additions & 2 deletions ipwb/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ <h1><img src="/ipwbassets/logo.png" alt="ipwb" /></h1>
</form>
</div>
<footer>
<p id="memCount"><a id="memCountListLink"><span id="memCountInt">{{ summary.urimCount}}</span></a> Memento<span id="plural">s</span> available</p>
<p id="memCount"><a id="memCountListLink"><span id="memCountInt">{{ summary.urim_count}}</span></a> Memento<span id="plural">s</span> available</p>
<details>
<summary>Options and Details...</summary>
<p><label>Archive Index(es): </label>{{ summary.index_path }}</p>
Expand All @@ -52,7 +52,7 @@ <h1><img src="/ipwbassets/logo.png" alt="ipwb" /></h1>
</footer>
<div id="uris" class="hidden">
<h3 id="urisHeader"><abbr title="Uniform Resource Identifiers">URIs</abbr> locally available</h3>
<h4 id="htmlCountHeader">{{ pluralize(summary.urimCount, 'memento', 'mementos') }} of {{ pluralize(summary.urirCount, 'resource', 'resources') }} with <span id="htmlPages">{{ summary.htmlCount }}</span>
<h4 id="htmlCountHeader">{{ pluralize(summary.urim_count, 'memento', 'mementos') }} of {{ pluralize(summary.urir_count, 'resource', 'resources') }} with <span id="htmlPages">{{ summary.htmlCount }}</span>
HTML page<span id="htmlPagesPlurality">s</span> listed
<button id="showEmbeddedURI" data-defaultValue="Show All" data-activatedValue="Show Only HTML Pages">Show All</button></h4>
<ul id="uriList"></ul>
Expand Down
6 changes: 3 additions & 3 deletions tests/testUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def get_random_string(n):


def count_cdxj_entries(cdxj_data):
urimCount = 0
urim_count = 0
lines = cdxj_data.strip().split('\n')
for line in lines:
if line[0] != '!': # Exclude metadata from count
urimCount += 1
return urimCount
urim_count += 1
return urim_count


def start_replay(warc_filename):
Expand Down