From acd81fc336679dd8a41bb13f12c66b5bc7f1c387 Mon Sep 17 00:00:00 2001
From: fanchengyan <fanchy14@lzu.edu.cn>
Date: Mon, 4 Dec 2023 13:44:25 +0800
Subject: [PATCH] format using black

---
 data_downloader/downloader.py |  20 +++----
 data_downloader/parse_urls.py | 104 +++++++++++++++++++---------------
 setup.py                      |   2 +-
 3 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/data_downloader/downloader.py b/data_downloader/downloader.py
index 91bb87f..1610788 100755
--- a/data_downloader/downloader.py
+++ b/data_downloader/downloader.py
@@ -1,19 +1,19 @@
-import os
-import time
+import asyncio
 import datetime as dt
+import multiprocessing as mp
+import os
 import selectors
-import asyncio
+import time
+from netrc import netrc
+from pathlib import Path
+from urllib.parse import urlparse
+
+import browser_cookie3 as bc
 import httpx
-import requests
 import nest_asyncio
-import browser_cookie3 as bc
+import requests
 from dateutil.parser import parse
-from netrc import netrc
-import multiprocessing as mp
-from urllib.parse import urlparse
 from tqdm.auto import tqdm
-from pathlib import Path
-
 
 nest_asyncio.apply()
 
diff --git a/data_downloader/parse_urls.py b/data_downloader/parse_urls.py
index 9fe1090..e0779ac 100644
--- a/data_downloader/parse_urls.py
+++ b/data_downloader/parse_urls.py
@@ -1,30 +1,32 @@
-from data_downloader.downloader import get_netrc_auth, get_url_host
-from xml.dom.minidom import parse
-from urllib.parse import urljoin
 from pathlib import Path
+from urllib.parse import urljoin
+from xml.dom.minidom import parse
+
 import httpx
 from bs4 import BeautifulSoup
 
+from data_downloader.downloader import get_netrc_auth, get_url_host
+
 
 def from_urls_file(url_file):
-    '''parse urls from a file which only contains urls 
+    """parse urls from a file which only contains urls
 
     Parameters:
     -----------
     url_file: str
-        path to file which only contains urls 
+        path to file which only contains urls
 
     Return:
     -------
     a list contains urls
-    '''
+    """
     with open(url_file) as f:
         urls = [i.strip() for i in f.readlines()]
     return urls
 
 
 def from_sentinel_meta4(url_file):
-    '''parse urls from sentinel `products.meta4` file downloaded from 
+    """parse urls from sentinel `products.meta4` file downloaded from
     https://scihub.copernicus.eu/dhus
 
     Parameters:
@@ -35,24 +37,23 @@ def from_sentinel_meta4(url_file):
     Return:
     -------
     a list contains urls
-    '''
+    """
     data = parse(url_file).documentElement
-    urls = [i.childNodes[0].nodeValue for i in
-            data.getElementsByTagName('url')]
+    urls = [i.childNodes[0].nodeValue for i in data.getElementsByTagName("url")]
     return urls
 
 
 def from_html(url, suffix=None, suffix_depth=0, url_depth=0):
-    '''parse urls from html website
+    """parse urls from html website
 
     Parameters:
     -----------
     url: str
         the website contatins data
     suffix: list, optional
-        data format. suffix should be a list contains multipart. 
-        if suffix_depth is 0, all '.' will parsed. 
-        Examples: 
+        data format. suffix should be a list contains multipart.
+        if suffix_depth is 0, all '.' will parsed.
+        Examples:
             when set 'suffix_depth=0':
                 suffix of 'xxx8.1_GLOBAL.nc' should be ['.1_GLOBAL', '.nc']
                 suffix of 'xxx.tar.gz' should be ['.tar', '.gz']
@@ -76,7 +77,8 @@ def from_html(url, suffix=None, suffix_depth=0, url_depth=0):
     >>> urls = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1)
     >>> urls_all = parse_urls.from_html(url, suffix=['.nc'], suffix_depth=1, url_depth=1)
     >>> print(len(urls_all)-len(urls))
-    '''
+    """
+
     def match_suffix(href, suffix):
         if suffix:
             sf = Path(href).suffixes[-suffix_depth:]
@@ -85,17 +87,19 @@ def match_suffix(href, suffix):
             return True
 
     r_h = httpx.head(url)
-    if 'text/html' in r_h.headers['Content-Type']:
+    if "text/html" in r_h.headers["Content-Type"]:
         r = httpx.get(url)
-        soup = BeautifulSoup(r.text, 'html.parser')
+        soup = BeautifulSoup(r.text, "html.parser")
 
-        a = soup.find_all('a')
-        urls_all = [urljoin(url, i['href']) for i in a if i.has_key('href')]
+        a = soup.find_all("a")
+        urls_all = [urljoin(url, i["href"]) for i in a if i.has_key("href")]
         urls = [i for i in urls_all if match_suffix(i, suffix)]
         if url_depth > 0:
-            urls_notdata = sorted(set(urls_all)-set(urls))
-            urls_depth = [from_html(_url, suffix, suffix_depth, url_depth - 1)
-                          for _url in urls_notdata]
+            urls_notdata = sorted(set(urls_all) - set(urls))
+            urls_depth = [
+                from_html(_url, suffix, suffix_depth, url_depth - 1)
+                for _url in urls_notdata
+            ]
 
             for u in urls_depth:
                 if isinstance(u, list):
@@ -105,8 +109,8 @@ def match_suffix(href, suffix):
 
 
 def _retrieve_all_orders(url_host, email, auth):
-    filters = {'status': 'complete'}
-    url = urljoin(url_host, f'/api/v1/list-orders/{email}')
+    filters = {"status": "complete"}
+    url = urljoin(url_host, f"/api/v1/list-orders/{email}")
     r = httpx.get(url, params=filters, auth=auth)
     r.raise_for_status()
     all_orders = r.json()
@@ -115,29 +119,33 @@ def _retrieve_all_orders(url_host, email, auth):
 
 
 def _retrieve_urls_from_order(url_host, orderid, auth):
-    filters = {'status': 'complete'}
-    url = urljoin(url_host, f'/api/v1/item-status/{orderid}')
+    filters = {"status": "complete"}
+    url = urljoin(url_host, f"/api/v1/item-status/{orderid}")
     r = httpx.get(url, params=filters, auth=auth)
     r.raise_for_status()
     urls_info = r.json()
     if isinstance(urls_info, dict):
-        messages = urls_info.pop('messages', dict())
-        if messages.get('errors'):
-            raise Exception('{}'.format(messages.get('errors')))
-        if messages.get('warnings'):
-            print('>>> Warning: {}'.format(messages.get('warnings')))
+        messages = urls_info.pop("messages", dict())
+        if messages.get("errors"):
+            raise Exception("{}".format(messages.get("errors")))
+        if messages.get("warnings"):
+            print(">>> Warning: {}".format(messages.get("warnings")))
 
     if orderid not in urls_info:
-        raise ValueError(f'Order ID{orderid} not found')
-    urls = [i.get('product_dload_url') for i in urls_info[orderid]
-            if i.get('product_dload_url') != '']
+        raise ValueError(f"Order ID{orderid} not found")
+    urls = [
+        i.get("product_dload_url")
+        for i in urls_info[orderid]
+        if i.get("product_dload_url") != ""
+    ]
 
     return urls
 
 
-def from_EarthExplorer_order(username=None, passwd=None, email=None,
-                             order=None, url_host=None):
-    '''parse urls from orders in earthexplorer.
+def from_EarthExplorer_order(
+    username=None, passwd=None, email=None, order=None, url_host=None
+):
+    """parse urls from orders in earthexplorer.
 
     Reference: [bulk-downloader](https://code.usgs.gov/espa/bulk-downloader)
 
@@ -149,7 +157,7 @@ def from_EarthExplorer_order(username=None, passwd=None, email=None,
     email: str, optional
         email address for the user that submitted the order
     order: str or dict
-        which order to download. If None, all orders retrieved from 
+        which order to download. If None, all orders retrieved from
         EarthExplorer will be used.
     url_host: str
         if host is not USGS ESPA
@@ -171,17 +179,19 @@ def from_EarthExplorer_order(username=None, passwd=None, email=None,
     >>>         folder.mkdir()
     >>>     urls = urls_info[odr]
     >>>     downloader.download_datas(urls, folder)
-    '''
+    """
     # init parameters
-    email = email if email else ''
+    email = email if email else ""
     if url_host is None:
-        url_host = 'https://espa.cr.usgs.gov'
+        url_host = "https://espa.cr.usgs.gov"
     host = get_url_host(url_host)
 
     auth = get_netrc_auth(host)
     if (auth == username) or (auth == passwd):
-        raise ValueError('username and passwd neither be found in netrc or'
-                         ' be assigned in parameter')
+        raise ValueError(
+            "username and passwd neither be found in netrc or"
+            " be assigned in parameter"
+        )
     elif not auth:
         auth = (username, passwd)
 
@@ -195,7 +205,7 @@ def from_EarthExplorer_order(username=None, passwd=None, email=None,
             try:
                 orders = list(order)
             except:
-                raise ValueError('order must be str or list of str')
+                raise ValueError("order must be str or list of str")
 
     urls_info = {}
     for odr in orders:
@@ -203,6 +213,8 @@ def from_EarthExplorer_order(username=None, passwd=None, email=None,
         if urls:
             urls_info.update({odr: urls})
         else:
-            print(f'>>> Warning: Data for order id {odr} have expired.'
-                  ' Please reorder it again if you want to use it anymore')
+            print(
+                f">>> Warning: Data for order id {odr} have expired."
+                " Please reorder it again if you want to use it anymore"
+            )
     return urls_info
diff --git a/setup.py b/setup.py
index 86552ce..bf43fd0 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="data-downloader",
-    version="0.5.0",
+    version="0.5.1",
     author="fanchegyan",
     author_email="fanchy14@lzu.edu.cn",
     description="Make downloading scientific data much easier",