From 4f68f9b53be9999d6cc65fb8983870cdbbfc3a96 Mon Sep 17 00:00:00 2001 From: konono Date: Tue, 8 Feb 2022 03:16:53 +0900 Subject: [PATCH] [Mod] URLextract requires a cache file to be created, which cannot be used in lambda, so change the logic to regular expressions Please refer https://github.com/lipoja/URLExtract/issues/61 --- holoscope/datamodel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/holoscope/datamodel.py b/holoscope/datamodel.py index f88efa5..0da26bb 100644 --- a/holoscope/datamodel.py +++ b/holoscope/datamodel.py @@ -2,11 +2,11 @@ # -*- coding: utf-8 -*- import arrow +import re from dataclasses import dataclass from typing import List from typing import Optional -from urlextract import URLExtract from urllib.parse import urlparse @@ -67,7 +67,8 @@ def description(self) -> str: @property def video_id(self) -> str: - for url in URLExtract().find_urls(self.description): + url_pattern = "https?://[\w/:%#\$&\?\(\)~\.=\+\-]+" # noqa: W605 + for url in re.findall(url_pattern, self.description): url = urlparse(url) if 'youtube.com' in url.netloc or 'youtu.be' in url.netloc: return url.query.split('=')[1]