-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
91 lines (77 loc) · 2.76 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import time
import pandas as pd
import argparse
from pytubefix import YouTube, Channel
from src.lingq import *
from src.youtube import get_youtube_data
parser = argparse.ArgumentParser("LingQ-Importer")
parser.add_argument("-n", type=int, default=50)
parser.add_argument("channel_names", nargs="+", help="YouTube channel names")
args = parser.parse_args()
def run(
channel_name="JorgeDeLeonMx",
language_code="es",
n_videos=50,
manual_transcripts_only=True,
):
channel = Channel(f"https://www.youtube.com/@{channel_name}/videos")
courses = get_courses_by_language(language_code=language_code)["results"]
if f"{channel_name} YouTube" not in [course["title"] for course in courses]:
print(f"Creating New LingQ Course: `{channel_name} YouTube`")
new_collection = create_collection(
title=f"{channel_name} YouTube",
description=f"[YouTube Transcripts and Audio taken from @{channel_name}] \n\n{channel.description}".replace(
"\n", ""
),
language_code=language_code,
# sourceURL=channel.channel_url,
)
collection_id = new_collection["id"]
else:
print(f"Using Existing LingQ Course: `{channel_name} YouTube`")
collection_id = [
course["id"]
for course in courses
if course["title"] == f"{channel_name} YouTube"
][0]
existing_lessons = get_lessons_by_course(
language_code="es", course_id=collection_id
)["lessons"]
existing_titles = [lesson["title"] for lesson in existing_lessons]
records = get_youtube_data(
channel_name=channel_name,
language_code=language_code,
n_videos=n_videos,
ignore_titles=existing_titles,
manual_transcripts_only=manual_transcripts_only,
)
for record in records:
if not record["title"] in existing_titles:
response_json = import_lesson(
title=record["title"],
description=record["description"],
collection_id=collection_id,
url=record["url"],
language_code="es",
level=5,
extra={"ytpage": requests.get(record["url"]).text, "ytdebug": False},
)
print(f"Uploaded Video `{response_json['title']}`")
time.sleep(1)
if __name__ == "__main__":
# youtube_channels = [
# "JorgeDeLeonMx",
# "CasoCerrado",
# "EasySpanish",
# "31minutos",
# "noticias",
# "DreamingSpanish",
# "CoreanoVlogs",
# ]
for youtube_channel in args.channel_names:
run(
youtube_channel,
n_videos=args.n,
language_code="es",
manual_transcripts_only=False,
)