-
Notifications
You must be signed in to change notification settings - Fork 5
/
mitene_download.py
161 lines (136 loc) · 4.81 KB
/
mitene_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""Download medias from https://mitene.us/ or https://family-album.com/"""
__version__ = "0.3.0"
import argparse
import asyncio
import datetime
import glob
import json
import mimetypes
import os
import pathlib
import platform
import sys
import urllib.parse
from typing import Awaitable
import aiohttp
async def gather_with_concurrency(n: int, *tasks: Awaitable[None]) -> None:
"""Like asyncio.gather but limit the number of concurent tasks."""
semaphore = asyncio.Semaphore(n)
async def sem_task(task: Awaitable[None]) -> None:
async with semaphore:
await task
await asyncio.gather(*(sem_task(task) for task in tasks))
async def download_media(
session: aiohttp.ClientSession,
url: str,
destination_filename: str,
media_name: str,
verbose: bool,
) -> None:
"""Download one media from URL"""
if not os.path.exists(destination_filename):
if verbose:
print(f"Downloading {media_name} ⏳", flush=True)
with open(destination_filename + ".tmp", "wb") as f:
r = await session.get(url)
r.raise_for_status()
async for chunk in r.content.iter_chunked(1024):
f.write(chunk)
os.rename(destination_filename + ".tmp", destination_filename)
elif verbose:
print(f"{media_name} already downloaded ✔️", flush=True)
async def async_main() -> None:
parser = argparse.ArgumentParser(prog="mitene_download", description=__doc__)
parser.add_argument(
"album_url",
help="""
URL of the album.
This is the URL obtained by inviting a family member for the web version.
""",
)
parser.add_argument("--destination-directory", default="out")
parser.add_argument("-p", "--password")
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
os.makedirs(args.destination_directory, exist_ok=True)
# cleanup temp files from previous run, if interrupted
for tmp_file in glob.glob(os.path.join(args.destination_directory, "*.tmp")):
os.unlink(tmp_file)
download_coroutines = []
async with aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=datetime.timedelta(minutes=30).total_seconds())
) as session:
page = 1
while True:
r = await session.get(f"{args.album_url}?page={page}")
response_text = await r.text()
if page == 1 and "Please enter your password" in response_text:
if not args.password:
print(
"Album is password protected, please specify password with --password",
file=sys.stderr,
)
sys.exit(1)
authenticity_token = response_text.split('name="authenticity_token" value="')[
1
].split('"')[0]
assert authenticity_token, "Could not parse authenticity token"
r = await session.post(
f"{args.album_url}/login",
data={
"session[password]": args.password,
"authenticity_token": authenticity_token,
},
)
if r.url.path.endswith("/login"):
print("Could not authenticate, maybe password is incorrect", file=sys.stderr)
sys.exit(1)
continue
page_text = response_text.split("//<![CDATA[\nwindow.gon={};gon.media=")[1].split(
";gon.familyUserIdToColorMap="
)[0]
data = json.loads(page_text)
page += 1
if not data["mediaFiles"]:
break
for media in data["mediaFiles"]:
filename = urllib.parse.urlparse(
media.get("expiringVideoUrl", media["expiringUrl"])
).path.split("/")[-1]
filename = f'{media["tookAt"]}-{filename}'
if platform.system() == "Windows":
filename = filename.replace(":", "")
if not os.path.splitext(filename)[1]:
filename = filename + mimetypes.guess_extension(media["contentType"])
destination_filename = os.path.join(
args.destination_directory,
filename,
)
download_coroutines.append(
download_media(
session,
f"{args.album_url}/media_files/{media['uuid']}/download",
destination_filename,
media["uuid"],
args.verbose,
)
)
if media["comments"]:
comment_text = "".join(
f'**{comment["user"]["nickname"]}**: {comment["body"]}\n\n'
for comment in media["comments"]
if not comment["isDeleted"]
)
comment_file = pathlib.Path(os.path.splitext(destination_filename)[0] + ".md")
if not (
comment_file.exists()
and comment_file.read_text(encoding="utf-8") == comment_text
):
comment_file.write_text(comment_text, encoding="utf-8")
await gather_with_concurrency(4, *download_coroutines)
await session.close()
def main() -> None:
loop = asyncio.get_event_loop()
loop.run_until_complete(async_main())
if __name__ == "__main__":
main()