-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathURLRoller.py
51 lines (42 loc) · 1.61 KB
/
URLRoller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# !/usr/bin/env python
# -*- coding: utf-8 -*-
''' used for URL list exploration Python 3
'''
import itertools
import re
import codecs
from CfgReader import CfgFileParser
from FileLogger import FileLogger
CONFIGFILE = 'WebPageCollector.cfg'
# ============================================================================================================
class URLRoller():
def __init__(self):
self.file_logger = FileLogger()
cfg_parser = CfgFileParser()
self.Config = cfg_parser.file_reader(CONFIGFILE)
self.Project = self.Config['Collector']['project']
self.step = int(self.Config[self.Project]['loader'])
enum_list = []
for Idx in range(self.step):
print('in the loop')
col_data = self.Config[self.Project ][str(Idx+1)]
file_in = codecs.open(col_data, 'r', 'utf-8')
Cpt = 0
for Line in file_in:
URL = Line[:-1].strip()
URL = re.sub(r'\s+', ' ', URL)
enum_list.append(URL)
Cpt +=1
self.file_logger.csv_log('URL %s:' % Cpt, URL)
self.URLList = enum_list
self.StartURL = enum_list[0]
self.URLRoller = itertools.cycle(enum_list)
def next(self):
return next(self.URLRoller)
def starter(self):
return self.StartURL
# ============================================================================================================
if __name__ == '__main__':
URLRoller = URLRoller()
for i in range(10):
print(i, URLRoller.next())