diff --git a/.gitignore b/.gitignore index 01b09d90..90ecfe9a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +.pytest_cache # C extensions *.so diff --git a/ipwb/replay.py b/ipwb/replay.py index 5f17f5ab..46c4b242 100755 --- a/ipwb/replay.py +++ b/ipwb/replay.py @@ -691,15 +691,19 @@ def handler(signum, frame): # respWithLinkHeader = getLinkHeaderAbbreviatedTimeMap(path, datetime) # resp.headers['Link'] = respWithLinkHeader.replace('\n', ' ') - if status[0] == '3': + if status[0] == '3' and isUri(resp.headers.get('Location')): # Bad assumption that the URI-M will contain \d14 but works for now. uriBeforeURIR = request.url[:re.search(r'/\d{14}/', request.url).end()] newURIM = uriBeforeURIR + resp.headers['Location'] - resp.headers['location'] = newURIM + resp.headers['Location'] = newURIM return resp +def isUri(str): + return re.match('^https?://', str, flags=re.IGNORECASE) + + def generateNoMementosInterface_noDatetime(urir): msg = '

ERROR 404

' msg += 'No capture(s) found for {0}.'.format(urir) diff --git a/tests/test_replay.py b/tests/test_replay.py index 6e3c4b83..45369e39 100644 --- a/tests/test_replay.py +++ b/tests/test_replay.py @@ -119,4 +119,45 @@ def test_unit_commandDaemon(): except Exception as e: assert False + +@pytest.mark.parametrize("expected,input", [ + (True, 'http://example.com'), + (True, 'https://example.com'), + (True, 'HTTP://EXAMPLE.COM'), + (True, 'HTTPS://EXAMPLE.COM'), + (True, 'http://example.com/'), + (True, 'http://example.com/foo.bar'), + (True, 'https://www.example.com/foo?a=b&c=d'), + (False, ''), + (False, 'foo'), + (False, 'foo/bar.baz'), + (False, 'foo?a=b&c=d'), + (False, '/'), + (False, '/foo'), + (False, '/foo/bar.baz'), + (False, '/foo?a=b&c=d'), + (False, './'), + (False, './foo'), + (False, './foo/bar.baz'), + (False, './foo?a=b&c=d'), + (False, '../'), + (False, '../foo'), + (False, '../foo/bar.baz'), + (False, '../foo?a=b&c=d'), + (False, '../../'), + (False, '../../foo'), + (False, '../../foo/bar.baz'), + (False, '../../foo?a=b&c=d'), + (False, 'ftp://example.com'), + (False, 'httpd://example.com'), + (False, 'http//example.com'), + (False, 'http:/example.com'), + (False, 'http:example.com'), + (False, 'http.example.com'), + (False, 'http-bin.com'), +]) +def test_isUri(expected, input): + assert expected == bool(replay.isUri(input)) + + # TODO: Have unit tests for each function in replay.py