forked from Nandaka/PixivUtil2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PixivBookmark.py
138 lines (112 loc) · 4.33 KB
/
PixivBookmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# -*- coding: utf-8 -*-
# pylint: disable=I0011, C, C0302
import codecs
import collections
import json
import re
from datetime import datetime
from bs4 import BeautifulSoup
import PixivException
class PixivBookmark(object):
'''Class for parsing Bookmarks'''
# __re_imageULItemsClass = re.compile(r".*\b_image-items\b.*")
@staticmethod
def parseBookmark(page, root_directory, db_path, locale='', is_json=False):
'''Parse favorite artist page'''
from PixivDBManager import PixivDBManager
bookmarks = list()
result2 = list()
db = PixivDBManager(root_directory=root_directory, target=db_path)
if is_json:
parsed = json.loads(page)
for member in parsed["body"]["users"]:
if "isAdContainer" in member and member["isAdContainer"]:
continue
result2.append(member["userId"])
else:
# old method
parse_page = BeautifulSoup(page, features="html5lib")
__re_member = re.compile(locale + r'/users/(\d*)')
member_list = parse_page.find(attrs={'class': 'members'})
result = member_list.findAll('a')
# filter duplicated member_id
d = collections.OrderedDict()
for r in result:
member_id = __re_member.findall(r['href'])
if len(member_id) > 0:
d[member_id[0]] = member_id[0]
result2 = list(d.keys())
parse_page.decompose()
del parse_page
for r in result2:
item = db.selectMemberByMemberId2(r)
bookmarks.append(item)
return bookmarks
@staticmethod
def parseImageBookmark(page, image_tags_filter=None):
total_images = 0
imageList = list()
image_bookmark = json.loads(page)
total_images = image_bookmark["body"]["total"] # total bookmarks, won't be the same if image_tags_filter used.
for work in image_bookmark["body"]["works"]:
if "isAdContainer" in work and work["isAdContainer"]:
continue
# Issue #928
skip = True
if image_tags_filter is not None: # exact tag only
for tag in work["tags"]:
if tag == image_tags_filter:
skip = False
break
if skip:
continue
# Issue #822
if "illustId" in work:
imageList.append(int(work["illustId"]))
elif "id" in work:
imageList.append(int(work["id"]))
return (imageList, total_images)
@staticmethod
def exportList(lst, filename):
if not filename.endswith('.txt'):
filename = filename + '.txt'
writer = codecs.open(filename, 'w', encoding='utf-8')
writer.write(f'###Export members date: {datetime.today()} ###\n')
for item in lst:
data = str(item.memberId)
if len(item.path) > 0:
data = data + ' ' + item.path
writer.write(data)
writer.write('\r\n')
writer.write('###END-OF-FILE###')
writer.close()
@staticmethod
def export_image_list(lst, filename):
if not filename.endswith('.txt'):
filename = filename + '.txt'
writer = codecs.open(filename, 'w', encoding='utf-8')
writer.write(f'###Export images date: {datetime.today()} ###\n')
for item in lst:
data = str(item)
writer.write(data)
writer.write('\r\n')
writer.write('###END-OF-FILE###')
writer.close()
class PixivNewIllustBookmark(object):
'''Class for parsing New Illust from Bookmarks'''
imageList = None
isLastPage = None
haveImages = None
def __init__(self, page):
self.__ParseNewIllustBookmark(page)
# self.__CheckLastPage(page)
self.haveImages = bool(len(self.imageList) > 0)
def __ParseNewIllustBookmark(self, page):
self.imageList = list()
page_json = json.loads(page)
if bool(page_json["error"]):
raise PixivException(page_json["message"], errorCode=PixivException.OTHER_ERROR)
# 1028
for image_id in page_json["body"]["page"]["ids"]:
self.imageList.append(int(image_id))
return self.imageList