Skip to content

Commit

Permalink
ImproveLogs
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeMeliz committed Oct 31, 2024
1 parent b689557 commit 6a1a9da
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions modules/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,20 @@ def canonical(self, link):
final_link = self.website + "/" + link
return final_link

def write_log(self, log):
log_path = self.out_path + '/crawler.log'
now = datetime.datetime.now()

if self.logs is True:
open(log_path, 'a+')
if self.logs is True and os.access(log_path, os.W_OK) is False:
print(f"## Unable to write to {self.out_path}/log.txt - Exiting")
sys.exit(2)
with open(log_path, 'a+', encoding='UTF-8') as log_file:
log_file.write(str(now) + " [crawler.py] " + log)
log_file.close()


def crawl(self):
""" Core of the crawler.
:return: List (ord_lst) - List of crawled links.
Expand All @@ -91,11 +105,6 @@ def crawl(self):
ord_lst = []
ord_lst.insert(0, self.website)
ord_lst_ind = 0
log_path = self.out_path + '/log.txt'

if self.logs is True and os.access(log_path, os.W_OK) is False:
print(f"## Unable to write to {self.out_path}/log.txt - Exiting")
sys.exit(2)

print(f"## Crawler started from {self.website} with "
f"{str(self.c_depth)} depth crawl, and {str(self.c_pause)} "
Expand Down Expand Up @@ -180,22 +189,21 @@ def crawl(self):
ord_lst = ord_lst + list(set(lst))
ord_lst = list(set(ord_lst))

# Keeps logs for every webpage visited.
it_code = html_page.getcode()
url_visited = f"[{str(it_code)}] {str(item)} \n"
self.write_log("[INFO] Logged: " + url_visited)

if self.verbose:
sys.stdout.write("-- Results: " + str(len(ord_lst)) + "\r")
sys.stdout.write("\033[K -- Results: " + str(len(ord_lst)) + " | Scanned: [" + str(it_code) + "] " + str(item) + "\r")
sys.stdout.flush()

# Pause time.
if (ord_lst.index(item) != len(ord_lst) - 1) and \
float(self.c_pause) > 0:
time.sleep(float(self.c_pause))

# Keeps logs for every webpage visited.
if self.logs:
it_code = html_page.getcode()
with open(log_path, 'w+', encoding='UTF-8') as log_file:
log_file.write(f"[{str(it_code)}] {str(item)} \n")

print(f"## Step {str(index + 1)} completed "
print(f"\033[K## Step {str(index + 1)} completed "
f"with: {str(len(ord_lst))} result(s)")

return ord_lst

0 comments on commit 6a1a9da

Please sign in to comment.