Skip to content

Commit

Permalink
Adding YoutubeShort result to youtube url parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
kat-kel authored Feb 29, 2024
1 parent 9ad3ce9 commit eb152c4
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 8 deletions.
28 changes: 22 additions & 6 deletions test/youtube_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
# Ural Youtube Unit Tests
# =============================================================================
from ural.youtube import (
YoutubeChannel,
YoutubeShort,
YoutubeUser,
YoutubeVideo,
extract_video_id_from_youtube_url,
is_youtube_channel_id,
is_youtube_url,
is_youtube_video_id,
is_youtube_channel_id,
parse_youtube_url,
extract_video_id_from_youtube_url,
normalize_youtube_url,
YoutubeVideo,
YoutubeUser,
YoutubeChannel,
parse_youtube_url,
)

IS_TESTS = [
Expand Down Expand Up @@ -232,6 +233,21 @@
YoutubeChannel(id=None, name="28minutesARTE"),
"https://www.youtube.com/28minutesARTE",
),
(
"https://www.youtube.com/shorts/xnh-JKqktAU",
YoutubeShort(id="xnh-JKqktAU"),
"https://www.youtube.com/shorts/xnh-JKqktAU",
),
(
"https://www.youtube.com/shorts/U5Bn8mMxj4o/nonsense?whatever",
YoutubeShort(id="U5Bn8mMxj4o"),
"https://www.youtube.com/shorts/U5Bn8mMxj4o",
),
(
"https://www.youtube.com/shorts/",
None,
"https://www.youtube.com/shorts/",
),
]


Expand Down
24 changes: 23 additions & 1 deletion ural/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@
# but there is no way to infer this...
YOUTUBE_CHANNEL_NAME_URL_TEMPLATE = "https://www.youtube.com/%s"

YOUTUBE_SHORT_URL_TEMPLATE = "https://www.youtube.com/shorts/%s"

YOUTUBE_CHANNEL_NAME_BLACKLIST = {
"about",
"account",
Expand All @@ -209,7 +211,7 @@
YoutubeVideo = namedtuple("YoutubeVideo", ["id", "playlist"])
YoutubeUser = namedtuple("YoutubeUser", ["id", "name"])
YoutubeChannel = namedtuple("YoutubeChannel", ["id", "name"])

YoutubeShort = namedtuple("YoutubeShort", ["id"])

# NOTE: we use a trie to perform efficient queries and so we don't
# need to test every domain/subdomain linearly
Expand Down Expand Up @@ -369,6 +371,22 @@ def parse_youtube_url(url, fix_common_mistakes=True):

return YoutubeChannel(id=cid, name=None)

elif path.startswith("/shorts/"):
splitted_path = pathsplit(path)

if len(splitted_path) < 2:
return None

v = splitted_path[1]

if fix_common_mistakes:
v = v[:11]

if not is_youtube_video_id(v):
return

return YoutubeShort(id=v)

else:
path = path.rstrip("/")
if path.count("/") == 1:
Expand Down Expand Up @@ -414,4 +432,8 @@ def normalize_youtube_url(url):

return YOUTUBE_CHANNEL_NAME_URL_TEMPLATE % parsed.name

if isinstance(parsed, YoutubeShort):
if parsed.id is not None:
return YOUTUBE_SHORT_URL_TEMPLATE % parsed.id

raise TypeError("normalize_youtube_url: impossible path reached")
5 changes: 4 additions & 1 deletion ural/youtube.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@ class YoutubeChannel(NamedTuple):
id: str
name: str

class YoutubeShort(NamedTuple):
id: str

def is_youtube_url(url: AnyUrlTarget) -> bool: ...
def is_youtube_video_id(value: str) -> bool: ...
def is_youtube_channel_id(value: str) -> bool: ...
def parse_youtube_url(
url: AnyUrlTarget, fix_common_mistakes: bool = ...
) -> Optional[Union[YoutubeVideo, YoutubeUser, YoutubeChannel]]: ...
) -> Optional[Union[YoutubeVideo, YoutubeUser, YoutubeChannel, YoutubeShort]]: ...
def extract_video_id_from_youtube_url(url: AnyUrlTarget) -> Optional[str]: ...
def normalize_youtube_url(url: AnyUrlTarget) -> str: ...

0 comments on commit eb152c4

Please sign in to comment.