Skip to content

hashlib

holzkohlengrill edited this page Dec 15, 2023 · 2 revisions
# We use lxml to get some contents to hash

import hashlib
import time
import requests
import datetime
import lxml
from lxml import html

print("availalbe algos:\n\t", hashlib.algorithms_available)     # including those provided by openssl
print()
print("garanteed algos:\n\t", hashlib.algorithms_guaranteed)    # only algorithms provided by this module

sha512hashPrev = hashlib.sha512("".encode())              # set init value

i = 0

while True:
    print("\r", i, end=",", flush=True)
    i += 1
    website = requests.get(r"https://www.merkur.de/nachrichtenticker/")
    lxmlWebsiteObj = lxml.html.fromstring(website.content)
    roiWebsite = lxmlWebsiteObj.xpath("""//*[@id="id-js-LoadMore--42858427--1"]/div[1]""")[0]
    websiteContent = roiWebsite.text_content()       # content with all childs
    websiteContentBin = websiteContent.encode()

    # how to overcome connection refused errror (to many requests from the same ip address; "Max retries exceeded with url:")
    # try:
    #     page1 = requests.get(ap)
    # except requests.exceptions.ConnectionError:
    #     r.status_code = "Connection refused"

    # websiteContent = website.text           # content in unicode >> `.content`: content in bytes
    # websiteContentBin = websiteContent.encode()                      # Make binary for hashing

    sha512hash = hashlib.sha512(websiteContentBin)
    if sha512hash.digest() != sha512hashPrev.digest():
        print("\n(info)\t content changed @ {}\n{}\n{}".format(datetime.datetime.now().time(), sha512hash.hexdigest(), sha512hashPrev.hexdigest()))
        sha512hashPrev = sha512hash.copy()
    time.sleep(2)
Clone this wiki locally