From 1aba480495a43cd3ecff4871613a2ae5358c2c4f Mon Sep 17 00:00:00 2001 From: MingxuanGame Date: Wed, 25 May 2022 00:56:41 +0800 Subject: [PATCH 1/3] feat: stream downloading and progress --- poetry.lock | 64 +++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + utils/http_utils.py | 76 +++++++++++++++++++++++++++++++++------------ 3 files changed, 121 insertions(+), 20 deletions(-) diff --git a/poetry.lock b/poetry.lock index bb5c59ec1..80029927d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -325,6 +325,22 @@ type = "legacy" url = "https://mirrors.aliyun.com/pypi/simple" reference = "ali" +[[package]] +name = "commonmark" +version = "0.9.1" +description = "Python parser for the CommonMark Markdown spec" +category = "dev" +optional = false +python-versions = "*" + +[package.extras] +test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "cycler" version = "0.11.0" @@ -988,6 +1004,19 @@ type = "legacy" url = "https://mirrors.aliyun.com/pypi/simple" reference = "ali" +[[package]] +name = "pygments" +version = "2.12.0" +description = "Pygments is a syntax highlighting package written in Python." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "pygtrie" version = "2.4.2" @@ -1208,6 +1237,27 @@ type = "legacy" url = "https://mirrors.aliyun.com/pypi/simple" reference = "ali" +[[package]] +name = "rich" +version = "12.4.3" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "dev" +optional = false +python-versions = ">=3.6.3,<4.0.0" + +[package.dependencies] +commonmark = ">=0.9.0,<0.10.0" +pygments = ">=2.6.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] + +[package.source] +type = "legacy" +url = "https://mirrors.aliyun.com/pypi/simple" +reference = "ali" + [[package]] name = "rsa" version = "4.8" @@ -1620,7 +1670,7 @@ reference = "ali" [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "1890d2859d3ad815037187b8ec867636f4d3f5e2b13088c110747ace41358ca4" +content-hash = "c3c182bd82d45cbbc3518ea79ce8c280844a01f002bc908b61624e75d1799ece" [metadata.files] aiofiles = [ @@ -1790,6 +1840,10 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +commonmark = [ + {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"}, + {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"}, +] cycler = [ {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, @@ -2338,6 +2392,10 @@ pyee = [ {file = "pyee-8.1.0-py2.py3-none-any.whl", hash = "sha256:383973b63ad7ed5e3c0311f8b179c52981f9e7b3eaea0e9a830d13ec34dde65f"}, {file = "pyee-8.1.0.tar.gz", hash = "sha256:92dacc5bd2bdb8f95aa8dd2585d47ca1c4840e2adb95ccf90034d64f725bfd31"}, ] +pygments = [ + {file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"}, + {file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"}, +] pygtrie = [ {file = "pygtrie-2.4.2.tar.gz", hash = "sha256:43205559d28863358dbbf25045029f58e2ab357317a59b11f11ade278ac64692"}, ] @@ -2518,6 +2576,10 @@ rfc3986 = [ {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, ] +rich = [ + {file = "rich-12.4.3-py3-none-any.whl", hash = "sha256:26ef784599a9ab905ade34ff28904e4fbe9bce16e02c33c78b0229551104c146"}, + {file = "rich-12.4.3.tar.gz", hash = "sha256:e7550ca19aec51b216ae4c34bfce82e94a0c79bdbf95cafbf42f343d0fb3f45a"}, +] rsa = [ {file = "rsa-4.8-py3-none-any.whl", hash = "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"}, {file = "rsa-4.8.tar.gz", hash = "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17"}, diff --git a/pyproject.toml b/pyproject.toml index d7900df9d..ab3c95054 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ emoji = "^1.7.0" wordcloud = "^1.8.1" [tool.poetry.dev-dependencies] +rich = "^12.4.3" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/utils/http_utils.py b/utils/http_utils.py index ce8bfee6b..aa4b97b37 100644 --- a/utils/http_utils.py +++ b/utils/http_utils.py @@ -14,6 +14,7 @@ import asyncio import aiofiles import httpx +import rich class AsyncHttpx: @@ -121,6 +122,7 @@ async def download_file( headers: Optional[Dict[str, str]] = None, cookies: Optional[Dict[str, str]] = None, timeout: Optional[int] = 30, + stream: bool = False, **kwargs, ) -> bool: """ @@ -135,31 +137,67 @@ async def download_file( :param headers: 请求头 :param cookies: cookies :param timeout: 超时时间 + :param stream: 是否使用流式下载(流式写入+进度条,适用于下载大文件) """ if isinstance(path, str): path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) try: for _ in range(3): - try: - content = ( - await cls.get( - url, - params=params, - headers=headers, - cookies=cookies, - use_proxy=use_proxy, - proxy=proxy, - timeout=timeout, - **kwargs, - ) - ).content - async with aiofiles.open(path, "wb") as wf: - await wf.write(content) - logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") - return True - except (TimeoutError, ConnectTimeout): - pass + if not stream: + try: + content = ( + await cls.get( + url, + params=params, + headers=headers, + cookies=cookies, + use_proxy=use_proxy, + proxy=proxy, + timeout=timeout, + **kwargs, + ) + ).content + async with aiofiles.open(path, "wb") as wf: + await wf.write(content) + logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") + return True + except (TimeoutError, ConnectTimeout): + pass + else: + if not headers: + headers = get_user_agent() + proxy = proxy if proxy else cls.proxy if use_proxy else None + try: + async with httpx.AsyncClient(proxies=proxy) as client: + async with client.stream( + "GET", + url, + params=params, + headers=headers, + cookies=cookies, + timeout=timeout, + **kwargs + ) as response: + logger.info(f"开始下载 {path.name}.. Path: {path.absolute()}") + async with aiofiles.open(path, "wb") as wf: + total = int(response.headers["Content-Length"]) + with rich.progress.Progress( + rich.progress.TextColumn(path.name), + "[progress.percentage]{task.percentage:>3.0f}%", + rich.progress.BarColumn(bar_width=None), + rich.progress.DownloadColumn(), + rich.progress.TransferSpeedColumn() + ) as progress: + download_task = progress.add_task("Download", total=total) + async for chunk in response.aiter_bytes(): + await wf.write(chunk) + await wf.flush() + progress.update(download_task, completed=response.num_bytes_downloaded) + logger.info(f"下载 {url} 成功.. Path:{path.absolute()}") + return True + except (TimeoutError, ConnectTimeout): + pass else: logger.error(f"下载 {url} 下载超时.. Path:{path.absolute()}") except Exception as e: From d816e7696214366faa9a4c50d3e02b9064648b0c Mon Sep 17 00:00:00 2001 From: MingxuanGame Date: Wed, 25 May 2022 01:13:27 +0800 Subject: [PATCH 2/3] fix: rich from dev dependent to main dependent --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ab3c95054..db6d99c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,9 +42,9 @@ python-multipart = "^0.0.5" bilireq = "^0.1.2" emoji = "^1.7.0" wordcloud = "^1.8.1" +rich = "^12.4.3" [tool.poetry.dev-dependencies] -rich = "^12.4.3" [build-system] requires = ["poetry-core>=1.0.0"] From 96837fa8dbba50a08e33f5f856f5d7c5e95b4bec Mon Sep 17 00:00:00 2001 From: MingxuanGame Date: Wed, 25 May 2022 01:19:12 +0800 Subject: [PATCH 3/3] fix: rich from dev dependent to main dependent --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 80029927d..94445a62e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -329,7 +329,7 @@ reference = "ali" name = "commonmark" version = "0.9.1" description = "Python parser for the CommonMark Markdown spec" -category = "dev" +category = "main" optional = false python-versions = "*" @@ -1008,7 +1008,7 @@ reference = "ali" name = "pygments" version = "2.12.0" description = "Pygments is a syntax highlighting package written in Python." -category = "dev" +category = "main" optional = false python-versions = ">=3.6" @@ -1241,7 +1241,7 @@ reference = "ali" name = "rich" version = "12.4.3" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" +category = "main" optional = false python-versions = ">=3.6.3,<4.0.0"