Skip to content

Commit

Permalink
add titles check into urls.py
Browse files Browse the repository at this point in the history
  • Loading branch information
paskal committed Dec 10, 2023
1 parent f832a3f commit 5ed46f1
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion scripts/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from enum import Enum
from typing import Optional
from urllib.parse import urljoin
from lxml.html import fromstring

import requests

Expand All @@ -14,6 +15,7 @@ class RunTypes(Enum):
redirects = 'redirects'
chain_redirects = 'chain_redirects'
bad_status_codes = 'bad_status_codes'
titles = 'titles'

def __str__(self):
return self.value
Expand Down Expand Up @@ -54,7 +56,7 @@ def retrieve_url(url: str) -> Optional[requests.Response]:

def check_redirect(self, resp: requests.Response, url: str):
"""Prints URL and status code of the provided response if it has non-200 status code,
or URL and it's redirect final destination or the status code in case it's not 301 or 302.
or URL, and it's redirect final destination or the status code in case it's not 301 or 302.
"""
if url != resp.url:
self.update_redirect(url, resp.url)
Expand Down Expand Up @@ -92,6 +94,10 @@ def main(run_type: str, site: str, urls_file: str, update_redirects: bool):
resp = url_checker.retrieve_url(absolute_url)
if resp is None:
continue
if run_type == "titles":
title = fromstring(resp.content).findtext('.//title')
url = resp.url.removeprefix("https://favor-group.ru")
print(f"{url};{title}")
if run_type == "redirects":
url_checker.check_redirect(resp, absolute_url)
if run_type == "chain_redirects":
Expand Down

0 comments on commit 5ed46f1

Please sign in to comment.