From 2e9df31f60f6635a876cbe35b03dfc3655252f77 Mon Sep 17 00:00:00 2001
From: Adrien Barbaresi <adbar@users.noreply.github.com>
Date: Mon, 20 Nov 2023 17:29:31 +0100
Subject: [PATCH] add function is_valid_url() (#63)

---
 courlan/__init__.py | 8 +++++++-
 courlan/filters.py  | 5 +++++
 tests/unit_tests.py | 4 ++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/courlan/__init__.py b/courlan/__init__.py
index 62c51f2..290761e 100644
--- a/courlan/__init__.py
+++ b/courlan/__init__.py
@@ -14,7 +14,13 @@
 # imports
 from .clean import clean_url, normalize_url, scrub_url
 from .core import check_url, extract_links
-from .filters import is_navigation_page, is_not_crawlable, lang_filter, validate_url
+from .filters import (
+    is_navigation_page,
+    is_not_crawlable,
+    is_valid_url,
+    lang_filter,
+    validate_url,
+)
 from .sampling import sample_urls
 from .urlstore import UrlStore
 from .urlutils import (
diff --git a/courlan/filters.py b/courlan/filters.py
index 28847e0..6006f24 100644
--- a/courlan/filters.py
+++ b/courlan/filters.py
@@ -239,6 +239,11 @@ def validate_url(url: Optional[str]) -> Tuple[bool, Any]:
     return True, parsed_url
 
 
+def is_valid_url(url: Optional[str]) -> bool:
+    "Determine if a given string is a valid URL."
+    return validate_url(url)[0]
+
+
 def is_navigation_page(url: str) -> bool:
     """Determine if the URL is related to navigation and overview pages
     rather than content pages, e.g. /page/1 vs. article page."""
diff --git a/tests/unit_tests.py b/tests/unit_tests.py
index 7a8cdd1..2140d38 100644
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@@ -27,6 +27,7 @@
     is_external,
     sample_urls,
     validate_url,
+    is_valid_url,
     extract_links,
     extract_domain,
     filter_urls,
@@ -463,6 +464,9 @@ def test_validate():
     assert validate_url("http://test.org/test")[0] is True
     # assert validate_url("http://sub.-mkyong.com/test")[0] is False
 
+    assert not is_valid_url("http://www.test[.org/test")
+    assert is_valid_url("http://test.org/test")
+
 
 def test_normalization():
     assert normalize_url("HTTPS://WWW.DWDS.DE/") == "https://www.dwds.de/"