Skip to content

Commit

Permalink
[Mod] URLextract requires a cache file to be created, which cannot be…
Browse files Browse the repository at this point in the history
… used in lambda, so change the logic to regular expressions

Please refer lipoja/URLExtract#61
  • Loading branch information
konono committed Feb 8, 2022
1 parent 4b8502e commit 4f68f9b
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions holoscope/datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# -*- coding: utf-8 -*-

import arrow
import re

from dataclasses import dataclass
from typing import List
from typing import Optional
from urlextract import URLExtract
from urllib.parse import urlparse


Expand Down Expand Up @@ -67,7 +67,8 @@ def description(self) -> str:

@property
def video_id(self) -> str:
for url in URLExtract().find_urls(self.description):
url_pattern = "https?://[\w/:%#\$&\?\(\)~\.=\+\-]+" # noqa: W605
for url in re.findall(url_pattern, self.description):
url = urlparse(url)
if 'youtube.com' in url.netloc or 'youtu.be' in url.netloc:
return url.query.split('=')[1]
Expand Down

0 comments on commit 4f68f9b

Please sign in to comment.