-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
73 lines (63 loc) · 2.17 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import json
import os
def write_page(stock_id, logstr):
if not os.path.exists('log'):
os.makedirs('log')
with open(os.path.join('log',stock_id), 'w',encoding='utf-8') as f:
f.write(logstr)
def read_page(stock_id):
if not os.path.exists('log'):
os.makedirs('log')
if os.path.isfile(os.path.join('log',stock_id)):
with open(os.path.join('log',stock_id), 'r',encoding='utf-8') as f:
line_list = f.readlines()
start_page = line_list[0].rstrip('\n')
return int(start_page)
else:
return 0
def write_log(stock_id, logstr):
result_file_open = open(os.path.join('log',stock_id), 'a', encoding='utf-8')
result_file_open.write(logstr+'\n')
result_file_open.close()
def read_log(stock_id):
if not os.path.exists('log'):
os.makedirs('log')
if os.path.isfile(os.path.join('log',stock_id)):
comment_urls = {}
with open(os.path.join('log',stock_id), 'r',encoding='utf-8') as f:
line_list = f.readlines()
for i in range(0, len(line_list)):
record = json.loads(line_list[i].rstrip('\n')+"")
comment_urls[record['comment_url']] = record
return comment_urls
else:
return {}
def write_url(stock_id, logstr):
result_file_open = open(os.path.join('log',stock_id), 'a', encoding='utf-8')
result_file_open.write(logstr+'\n')
result_file_open.close()
def read_url(stock_id):
if not os.path.exists('log'):
os.makedirs('log')
if os.path.isfile(os.path.join('log',stock_id)):
comment_urls = []
with open(os.path.join('log',stock_id), 'r',encoding='utf-8') as f:
line_list = f.readlines()
for i in range(0, len(line_list)):
comment_urls.append(line_list[i].rstrip('\n'))
return set(comment_urls)
else:
return set()
def json2csv(path, save_path):
df = pd.DataFrame()
for file in os.listdir(path):
file_path = os.path.join(path, file)
with open(file_path,'r',encoding='utf-8') as f:
item = json.load(f)
row = pd.DataFrame(item,index=[0])
df = df.append(row,ignore_index=True)
df = df.set_index('time')
df.drop(['read','subcomments','comment_url','comment_id'],inplace=True,axis=1)
df.to_csv(save_path)
# df = json2csv('comment\\000983','00983.csv')