Black

Format with black!
edsu · Sep 14, 2023 · 68e1e86 · 68e1e86
1 parent e57ad4a
commit 68e1e86
Show file tree

Hide file tree

Showing 6 changed files with 95 additions and 47 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -23,5 +23,8 @@ jobs:
         pip install poetry
         poetry install
 
+    - name: Check formatting
+      run: black --check .
+
     - name: Test with pytest
       run: poetry run pytest -v
diff --git a/memento_cli/__init__.py b/memento_cli/__init__.py
@@ -9,7 +9,7 @@ def cli():
 
 
 @cli.command()
-@click.argument('url')
+@click.argument("url")
 def list(url):
     # auto-detect the timemap if it's a memento supporting web archive
     timemap_url = memento.get_timemap_url(url)
@@ -23,15 +23,15 @@ def list(url):
 
 
 @cli.command()
-@click.argument('start-url')
-@click.argument('end-url')
-@click.option('--text', help='text to look for on the page')
-@click.option('--missing', is_flag=True, help='missing text to look for on the page')
-@click.option('--show-browser', is_flag=True, help='see the browser')
+@click.argument("start-url")
+@click.argument("end-url")
+@click.option("--text", help="text to look for on the page")
+@click.option("--missing", is_flag=True, help="missing text to look for on the page")
+@click.option("--show-browser", is_flag=True, help="see the browser")
 def bisect(start_url, end_url, text, missing, show_browser):
     print()
     url = memento.bisect_urls(start_url, end_url, text, missing, show_browser)
-    click.echo(f'\rFound your archive snapshot: {url}')
+    click.echo(f"\rFound your archive snapshot: {url}")
 
 
 def main():

diff --git a/memento_cli/browser.py b/memento_cli/browser.py
@@ -2,6 +2,7 @@
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.options import Options
 
+
 class Browser:
     """
     A class for fetching text from a web page using a browser. This ensures

diff --git a/memento_cli/memento.py b/memento_cli/memento.py
@@ -9,15 +9,20 @@
 from .browser import Browser
 
 
-Memento = namedtuple('Memento', ['url', 'datetime'])
+Memento = namedtuple("Memento", ["url", "datetime"])
+
 
 def get_timemap_url(url):
     """
     Look for a Memento Timemap URL in the response headers for a web resource.
     """
     resp = requests.get(url)
-    if resp.status_code == 200 and 'timemap' in resp.links and 'url' in resp.links['timemap']:
-        return resp.links['timemap']['url']
+    if (
+        resp.status_code == 200
+        and "timemap" in resp.links
+        and "url" in resp.links["timemap"]
+    ):
+        return resp.links["timemap"]["url"]
     return None
 
 
@@ -27,14 +32,18 @@ def get_mementos(timemap_url) -> list[Memento]:
     """
     resp = requests.get(timemap_url)
     mementos = []
-    if resp.headers.get('content-type') == 'application/link-format':
+    if resp.headers.get("content-type") == "application/link-format":
         for link in parse_links(resp.text):
-            if link.get('rel') == 'memento':
-                mementos.append(Memento(
-                    link['url'],
-                    datetime.datetime.strptime(link['datetime'], "%a, %d %b %Y %H:%M:%S GMT")
-                ))
-
+            if link.get("rel") == "memento":
+                mementos.append(
+                    Memento(
+                        link["url"],
+                        datetime.datetime.strptime(
+                            link["datetime"], "%a, %d %b %Y %H:%M:%S GMT"
+                        ),
+                    )
+                )
+
     return mementos
 
 
@@ -45,14 +54,18 @@ def parse_links(text) -> list[dict]:
     # lean on requests for the parsing, but make prep the text to allow for
     # whitespace since parse_header_links is designed for a single line header
 
-    text = re.sub(r'^\s+', '', text)        # strip leading whitespace
-    text = re.sub(r',\s*$', '', text)       # strip trailing comma and any optional whitespace
-    text = re.sub(r'",\r?\n', ', ', text)   # remove dos/unix newlines between links
+    text = re.sub(r"^\s+", "", text)  # strip leading whitespace
+    text = re.sub(
+        r",\s*$", "", text
+    )  # strip trailing comma and any optional whitespace
+    text = re.sub(r'",\r?\n', ", ", text)  # remove dos/unix newlines between links
 
     return requests.utils.parse_header_links(text)
 
 
-def bisect_urls(start_url, end_url, text=None, missing=False, show_browser=False) -> str:
+def bisect_urls(
+    start_url, end_url, text=None, missing=False, show_browser=False
+) -> str:
     timemap_url = get_timemap_url(start_url)
     mementos = sorted(get_mementos(timemap_url), key=lambda m: m.datetime)
     memento_urls = [m.url for m in mementos]
@@ -68,8 +81,7 @@ def bisect_urls(start_url, end_url, text=None, missing=False, show_browser=False
     return bisect(start, end, memento_urls, text, missing, browser)
 
 
-def bisect(start, end, memento_urls, text, missing, browser) -> str: 
-
+def bisect(start, end, memento_urls, text, missing, browser) -> str:
     mid = start + int((end - start) / 2)
     if mid == start:
         return memento_urls[end]
@@ -85,7 +97,7 @@ def bisect(start, end, memento_urls, text, missing, browser) -> str:
             text_in_page = False
     # look in the page text
     else:
-        print('\r' + meter(start, end, len(memento_urls)), end='')
+        print("\r" + meter(start, end, len(memento_urls)), end="")
         text_in_page = text in page_text
 
     # do we want to find the page where the text went missing?
@@ -111,4 +123,4 @@ def meter(start, end, n):
     b = int((end - start + 1) * scale)
     c = int((n - end + 1) * scale)
 
-    return f'[{n - (end - start)}/{n}]: ' + a * '█' + b * '░' + c * '█'
+    return f"[{n - (end - start)}/{n}]: " + a * "█" + b * "░" + c * "█"
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,9 @@ pytest = "^7.4.2"
 [tool.poetry.scripts]
 memento = "memento_cli:main"
 
+[tool.black]
+include = ".py$"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/test_memento_cli.py b/test_memento_cli.py
@@ -5,23 +5,43 @@
     get_mementos,
     parse_links,
     bisect,
-    bisect_urls
+    bisect_urls,
 )
 
 from memento_cli.browser import Browser
 
 
 def test_get_timemap_url():
-    assert get_timemap_url('https://web.archive.org/web/20230621094005/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy') == 'https://web.archive.org/web/timemap/link/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy'
-    assert get_timemap_url('https://perma.cc/7CN8-NJNV') == 'https://perma.cc/timemap/html/http://arboretum.harvard.edu'
-    assert get_timemap_url('https://swap.stanford.edu/was/20230524140954/http://news.stanford.edu/') == 'https://swap.stanford.edu/was/timemap/link/http://news.stanford.edu/'
-    assert get_timemap_url('https://nytimes.com') is None
+    assert (
+        get_timemap_url(
+            "https://web.archive.org/web/20230621094005/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy"
+        )
+        == "https://web.archive.org/web/timemap/link/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy"
+    )
+    assert (
+        get_timemap_url("https://perma.cc/7CN8-NJNV")
+        == "https://perma.cc/timemap/html/http://arboretum.harvard.edu"
+    )
+    assert (
+        get_timemap_url(
+            "https://swap.stanford.edu/was/20230524140954/http://news.stanford.edu/"
+        )
+        == "https://swap.stanford.edu/was/timemap/link/http://news.stanford.edu/"
+    )
+    assert get_timemap_url("https://nytimes.com") is None
 
 
 def test_get_mementos():
-    mementos = list(get_mementos('https://web.archive.org/web/timemap/link/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy'))
+    mementos = list(
+        get_mementos(
+            "https://web.archive.org/web/timemap/link/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy"
+        )
+    )
     assert len(mementos) > 2000
-    assert mementos[0].url == 'https://web.archive.org/web/20171229054051/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy'
+    assert (
+        mementos[0].url
+        == "https://web.archive.org/web/20171229054051/https://help.twitter.com/en/rules-and-policies/hateful-conduct-policy"
+    )
     assert mementos[0].datetime == datetime.datetime(2017, 12, 29, 5, 40, 51)
     assert mementos[-1].datetime.year >= 2023
 
@@ -39,33 +59,42 @@ def test_parse_links():
     """
     links = parse_links(text)
     assert len(links) == 8
-    assert links[0]['rel'] == 'original'
-    assert links[0]['url'] == 'http://www.nytimes.com:80/'
-    assert links[7]['url'] == 'https://web.archive.org/web/19961219002950/http://www.nytimes.com:80/'
-    assert links[7]['rel'] == 'memento'
-    assert links[7]['datetime'] == 'Thu, 19 Dec 1996 00:29:50 GMT'
+    assert links[0]["rel"] == "original"
+    assert links[0]["url"] == "http://www.nytimes.com:80/"
+    assert (
+        links[7]["url"]
+        == "https://web.archive.org/web/19961219002950/http://www.nytimes.com:80/"
+    )
+    assert links[7]["rel"] == "memento"
+    assert links[7]["datetime"] == "Thu, 19 Dec 1996 00:29:50 GMT"
 
 
 def test_bisect_urls():
-    start_url = 'http://web.archive.org/web/20200102102511/https://inkdroid.org/'
-    end_url = 'http://web.archive.org/web/20230902020134/https://inkdroid.org/'
-    
-    url = bisect_urls(start_url, end_url, 'ReSpec Writing')
-    assert url == 'http://web.archive.org/web/20230601013229/https://inkdroid.org/'
+    start_url = "http://web.archive.org/web/20200102102511/https://inkdroid.org/"
+    end_url = "http://web.archive.org/web/20230902020134/https://inkdroid.org/"
+
+    url = bisect_urls(start_url, end_url, "ReSpec Writing")
+    assert url == "http://web.archive.org/web/20230601013229/https://inkdroid.org/"
 
 
 def test_bisect():
-    timemap = get_timemap_url('http://web.archive.org/web/20230902020134/https://inkdroid.org/')
+    timemap = get_timemap_url(
+        "http://web.archive.org/web/20230902020134/https://inkdroid.org/"
+    )
     mementos = sorted(get_mementos(timemap), key=lambda m: m.datetime)
     mementos = [m.url for m in mementos]
     browser = Browser(headless=True)
 
-    url = bisect(0, len(mementos), mementos, 'ReSpec Writing', missing=False, browser=browser)
-    assert url == 'http://web.archive.org/web/20230601013229/https://inkdroid.org/'
+    url = bisect(
+        0, len(mementos), mementos, "ReSpec Writing", missing=False, browser=browser
+    )
+    assert url == "http://web.archive.org/web/20230601013229/https://inkdroid.org/"
 
 
 def test_browser():
     browser = Browser(headless=True)
-    text = browser.get('https://swap.stanford.edu/was/20230524140954/https://library.stanford.edu/node/172367')
+    text = browser.get(
+        "https://swap.stanford.edu/was/20230524140954/https://library.stanford.edu/node/172367"
+    )
     # This text appears in an iframe provided by pywb
-    assert 'East Asian telegraph codes' in text
+    assert "East Asian telegraph codes" in text