diff --git a/poetry.lock b/poetry.lock index f7d096f9b..86ec46367 100644 --- a/poetry.lock +++ b/poetry.lock @@ -235,6 +235,22 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "commonmark" +version = "0.9.1" +description = "Python parser for the CommonMark Markdown spec" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "cycler" version = "0.11.0" @@ -713,6 +729,19 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "pygments" +version = "2.12.0" +description = "Pygments is a syntax highlighting package written in Python." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "pygtrie" version = "2.4.2" @@ -863,6 +892,27 @@ idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} [package.extras] idna2008 = ["idna"] +[[package]] +name = "rich" +version = "12.4.3" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "main" +optional = false +python-versions = ">=3.6.3,<4.0.0" + +[package.dependencies] +commonmark = ">=0.9.0,<0.10.0" +pygments = ">=2.6.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "rsa" version = "4.8" @@ -1150,7 +1200,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "89e873649d58d1c9df3f4bce93b12b681f80ea0d7f9ae96a9fae9feda87c825d" +content-hash = "c3c182bd82d45cbbc3518ea79ce8c280844a01f002bc908b61624e75d1799ece" [metadata.files] aiofiles = [ @@ -1320,6 +1370,10 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +commonmark = [ + {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"}, + {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"}, +] cycler = [ {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, @@ -1879,6 +1933,10 @@ pyee = [ {file = "pyee-8.1.0-py2.py3-none-any.whl", hash = "sha256:383973b63ad7ed5e3c0311f8b179c52981f9e7b3eaea0e9a830d13ec34dde65f"}, {file = "pyee-8.1.0.tar.gz", hash = "sha256:92dacc5bd2bdb8f95aa8dd2585d47ca1c4840e2adb95ccf90034d64f725bfd31"}, ] +pygments = [ + {file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"}, + {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"}, +] pygtrie = [ {file = "pygtrie-2.4.2.tar.gz", hash = "sha256:43205559d28863358dbbf25045029f58e2ab357317a59b11f11ade278ac64692"}, ] @@ -2059,6 +2117,10 @@ rfc3986 = [ {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, ] +rich = [ + {file = "rich-12.4.3-py3-none-any.whl", hash = "sha256:26ef784599a9ab905ade34ff28904e4fbe9bce16e02c33c78b0229551104c146"}, + {file = "rich-12.4.3.tar.gz", hash = "sha256:e7550ca19aec51b216ae4c34bfce82e94a0c79bdbf95cafbf42f343d0fb3f45a"}, +] rsa = [ {file = "rsa-4.8-py3-none-any.whl", hash = "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"}, {file = "rsa-4.8.tar.gz", hash = "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17"}, diff --git a/pyproject.toml b/pyproject.toml index d7900df9d..db6d99c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ python-multipart = "^0.0.5" bilireq = "^0.1.2" emoji = "^1.7.0" wordcloud = "^1.8.1" +rich = "^12.4.3" [tool.poetry.dev-dependencies] diff --git a/utils/http_utils.py b/utils/http_utils.py index ce8bfee6b..aa4b97b37 100644 --- a/utils/http_utils.py +++ b/utils/http_utils.py @@ -14,6 +14,7 @@ import asyncio import aiofiles import httpx +import rich class AsyncHttpx: @@ -121,6 +122,7 @@ async def download_file( headers: Optional[Dict[str, str]] = None, cookies: Optional[Dict[str, str]] = None, timeout: Optional[int] = 30, + stream: bool = False, **kwargs, ) -> bool: """ @@ -135,31 +137,67 @@ async def download_file( :param headers: 请求头 :param cookies: cookies :param timeout: 超时时间 + :param stream: 是否使用流式下载(流式写入+进度条,适用于下载大文件) """ if isinstance(path, str): path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) try: for _ in range(3): - try: - content = ( - await cls.get( - url, - params=params, - headers=headers, - cookies=cookies, - use_proxy=use_proxy, - proxy=proxy, - timeout=timeout, - **kwargs, - ) - ).content - async with aiofiles.open(path, "wb") as wf: - await wf.write(content) - logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") - return True - except (TimeoutError, ConnectTimeout): - pass + if not stream: + try: + content = ( + await cls.get( + url, + params=params, + headers=headers, + cookies=cookies, + use_proxy=use_proxy, + proxy=proxy, + timeout=timeout, + **kwargs, + ) + ).content + async with aiofiles.open(path, "wb") as wf: + await wf.write(content) + logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") + return True + except (TimeoutError, ConnectTimeout): + pass + else: + if not headers: + headers = get_user_agent() + proxy = proxy if proxy else cls.proxy if use_proxy else None + try: + async with httpx.AsyncClient(proxies=proxy) as client: + async with client.stream( + "GET", + url, + params=params, + headers=headers, + cookies=cookies, + timeout=timeout, + **kwargs + ) as response: + logger.info(f"开始下载 {path.name}.. Path: {path.absolute()}") + async with aiofiles.open(path, "wb") as wf: + total = int(response.headers["Content-Length"]) + with rich.progress.Progress( + rich.progress.TextColumn(path.name), + "[progress.percentage]{task.percentage:>3.0f}%", + rich.progress.BarColumn(bar_width=None), + rich.progress.DownloadColumn(), + rich.progress.TransferSpeedColumn() + ) as progress: + download_task = progress.add_task("Download", total=total) + async for chunk in response.aiter_bytes(): + await wf.write(chunk) + await wf.flush() + progress.update(download_task, completed=response.num_bytes_downloaded) + logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") + return True + except (TimeoutError, ConnectTimeout): + pass else: logger.error(f"下载 {url} 下载超时.. Path:{path.absolute()}") except Exception as e: