-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdriver.py
74 lines (66 loc) · 2.74 KB
/
driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import requests
from bs4 import BeautifulSoup
from CsvWriter import CsvWriter
from JsonWriter import JsonWriter
class Driver():
Driver = None;
def __init__(self):
self.Driver = None;
self.data = [];
self.filename = "fluent_answers.csv";
self.file = None;
self.writer = None;
self.queryPage = 1;
self.perPage = 100;
self.totalPages = 0;
self.is_running_first_time = True;
# Prepare data for writing
def prepareData(self, data, type):
dataList = dict() if type == "json" else list()
for dataKey, dataValue in enumerate(data):
title = BeautifulSoup(dataValue["title"]["rendered"], features="html.parser").get_text().replace("\n", '');
content = BeautifulSoup(dataValue["content"]["rendered"], features="html.parser").get_text().replace("\n", '');
excerpt = BeautifulSoup(dataValue["excerpt"]["rendered"], features="html.parser").get_text().replace("\n", '');
link = dataValue["link"];
if (type == "json"):
dataList[dataKey] = {
"title": title,
"content": content,
"link": link,
"excerpt": excerpt
}
else:
dataList.append([title, title, content, excerpt]);
return dataList;
# Request data from the API
def requestData(self, url):
url = f"{url}?page={self.queryPage}&per_page={self.perPage}";
try:
response = requests.get(url);
self.data = response.json();
if self.is_running_first_time:
self.totalPages = response.headers['X-Wp-Totalpages'];
self.is_running_first_time = False;
if len(self.data)-1 == self.perPage:
self.queryPage += 1
except:
print("Error: Something went wrong while requesting data");
return False;
# Write data to file
def writeData(self, type="csv"):
self.data = self.prepareData(self.data, type);
driver = type.capitalize()+"Writer"
self.Driver = eval(driver)
self.Driver(self.filename).write(self.data);
self.queryPage += 1;
def fileName(self, type):
return f"fluent_answers_{self.queryPage}.{type}";
def startWriting(self, url, type="csv"):
if (self.totalPages == 0):
self.requestData(url);
while (int(self.queryPage) <= int(self.totalPages)):
print(f"Getting data from page {self.queryPage}");
self.filename = self.fileName(type);
self.requestData(url);
self.writeData(type);
print("Done!");