Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: iqqtv ( #246), airav_cc (#251) #264

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions src/models/core/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,15 @@ def google_translate(title, outline):


def _google_translate(msg: str) -> (str, str):
msg_unquote = urllib.parse.unquote(msg)
url = f'https://translate.google.com/translate_a/single?client=gtx&sl=auto&tl=zh-CN&dt=t&q={msg_unquote}'
result, response = get_html(url, json_data=True)
if not result:
return msg, f'请求失败!可能是被封了,可尝试更换代理!错误:{response}'
return "".join([sen[0] for sen in response[0]]), ""
try:
msg_unquote = urllib.parse.unquote(msg)
url = f'https://translate.google.com/translate_a/single?client=gtx&sl=auto&tl=zh-CN&dt=t&q={msg_unquote}'
result, response = get_html(url, json_data=True)
if not result:
return msg, f'请求失败!可能是被封了,可尝试更换代理!错误:{response}'
return "".join([sen[0] for sen in response[0]]), ""
except Exception as e:
return msg, str(e)


def download_file_with_filepath(json_data, url, file_path, folder_new_path):
Expand Down
5 changes: 3 additions & 2 deletions src/models/crawlers/airav_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def get_real_url(html, number):
detail_url = each.xpath('.//a/@href')[0]
title = each.xpath('.//h5/text()')[0]
# 注意去除马赛克破坏版这种几乎没有有效字段的条目
if number.upper() in title and '克破' not in title:
if number.upper() in title and all(keyword not in title for keyword in ['克破', '无码破解', '無碼破解']):
return detail_url
return ''

Expand Down Expand Up @@ -336,4 +336,5 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'):
# print(main('x-art.19.11.03', ''))
# print(main('ssis-200', '')) # 多个搜索结果
# print(main('JUY-331', '')) # 存在系列字段
print(main('SONE-248', '')) # 简介存在无效信息 "*根据分发方式,内容可能会有所不同"
# print(main('SONE-248', '')) # 简介存在无效信息 "*根据分发方式,内容可能会有所不同"
print('CAWD-688','') # 无码破解
174 changes: 88 additions & 86 deletions src/models/crawlers/iqqtv.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,8 @@ def get_real_url(html, number):
detail_url = each.xpath('./a/@href')[0]
title = each.xpath('./a/@title')[0]
# 注意去除马赛克破坏版等几乎没有有效字段的条目
for i in ['克破', '无码流出', '無碼流出']:
if number.upper() in title and i not in title:
return detail_url
if number.upper() in title and all(keyword not in title for keyword in ['克破', '无码破解', '無碼破解', '无码流出','無碼流出']):
return detail_url
return ''


Expand All @@ -164,7 +163,10 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'):

if not re.match(r'n\d{4}', number):
number = number.upper()
real_url = appoint_url
if appoint_url:
real_url = appoint_url
else:
real_url = ''
iqqtv_url = getattr(config, "iqqtv_website", "https://iqq5.xyz")
cover_url = ''
image_cut = 'right'
Expand Down Expand Up @@ -199,93 +201,90 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'):
html = etree.fromstring(html_search, etree.HTMLParser())
real_url = html.xpath('//a[@class="ga_click"]/@href')
if real_url:
real_url = iqqtv_url + real_url[0].replace('/cn/', '').replace('/jp/', '').replace('&cat=19', '')
real_url_tmp = get_real_url(html, number)
real_url = iqqtv_url + real_url_tmp.replace('/cn/', '').replace('/jp/', '').replace('&cat=19', '')
else:
debug_info = '搜索结果: 未匹配到番号!'
log_info += web_info + debug_info
raise Exception(debug_info)
if real_url:
# 只有一个搜索结果时直接取值 多个则进入判断
if len(real_url) == 1:
real_url = iqqtv_url + real_url[0].replace('/cn/', '').replace('/jp/', '').replace('&cat=19', '')
else:
real_url_tmp = get_real_url(html, number)
real_url = iqqtv_url + real_url_tmp.replace('/cn/', '').replace('/jp/', '').replace('&cat=19', '')
debug_info = '番号地址: %s ' % real_url
else:
real_url = iqqtv_url + re.sub(r'.*player', 'player', appoint_url)

debug_info = '番号地址: %s ' % real_url
log_info += web_info + debug_info
result, html_content = get_html(real_url)
if not result:
debug_info = '网络请求错误: %s' % html_content
log_info += web_info + debug_info
result, html_content = get_html(real_url)
if not result:
debug_info = '网络请求错误: %s' % html_content
log_info += web_info + debug_info
raise Exception(debug_info)
html_info = etree.fromstring(html_content, etree.HTMLParser())
raise Exception(debug_info)
html_info = etree.fromstring(html_content, etree.HTMLParser())

title = get_title(html_info) # 获取标题
if not title:
debug_info = '数据获取失败: 未获取到title!'
log_info += web_info + debug_info
raise Exception(debug_info)
web_number = getWebNumber(title, number) # 获取番号,用来替换标题里的番号
title = title.replace(' %s' % web_number, '').strip()
actor = getActor(html_info) # 获取actor
actor_photo = getActorPhoto(actor)
title = get_real_title(title)
cover_url = getCover(html_info) # 获取cover
outline = getOutline(html_info)
release = getRelease(html_info)
year = getYear(release)
tag = getTag(html_info)
mosaic = getMosaic(tag)
if mosaic == '无码':
image_cut = 'center'
studio = getStudio(html_info)
runtime = ''
score = ''
series = get_series(html_info)
director = ''
publisher = studio
extrafanart = get_extrafanart(html_info)
tag = tag.replace('无码片', '').replace('無碼片', '').replace('無修正', '')
try:
dic = {
'number': web_number,
'title': title,
'originaltitle': title,
'actor': actor,
'outline': outline,
'originalplot': outline,
'tag': tag,
'release': release,
'year': year,
'runtime': runtime,
'score': score,
'series': series,
'director': director,
'studio': studio,
'publisher': publisher,
'source': 'iqqtv',
'website': real_url,
'actor_photo': actor_photo,
'cover': cover_url,
'poster': '',
'extrafanart': extrafanart,
'trailer': '',
'image_download': image_download,
'image_cut': image_cut,
'log_info': log_info,
'error_info': '',
'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )),
'mosaic': mosaic,
'wanted': '',
}

debug_info = '数据获取成功!'
log_info += web_info + debug_info
dic['log_info'] = log_info
except Exception as e:
debug_info = '数据生成出错: %s' % str(e)
log_info += web_info + debug_info
raise Exception(debug_info)
title = get_title(html_info) # 获取标题
if not title:
debug_info = '数据获取失败: 未获取到title!'
log_info += web_info + debug_info
raise Exception(debug_info)
web_number = getWebNumber(title, number) # 获取番号,用来替换标题里的番号
title = title.replace(' %s' % web_number, '').strip()
actor = getActor(html_info) # 获取actor
actor_photo = getActorPhoto(actor)
title = get_real_title(title)
cover_url = getCover(html_info) # 获取cover
outline = getOutline(html_info)
release = getRelease(html_info)
year = getYear(release)
tag = getTag(html_info)
mosaic = getMosaic(tag)
if mosaic == '无码':
image_cut = 'center'
studio = getStudio(html_info)
runtime = ''
score = ''
series = get_series(html_info)
director = ''
publisher = studio
extrafanart = get_extrafanart(html_info)
tag = tag.replace('无码片', '').replace('無碼片', '').replace('無修正', '')
try:
dic = {
'number': web_number,
'title': title,
'originaltitle': title,
'actor': actor,
'outline': outline,
'originalplot': outline,
'tag': tag,
'release': release,
'year': year,
'runtime': runtime,
'score': score,
'series': series,
'director': director,
'studio': studio,
'publisher': publisher,
'source': 'iqqtv',
'website': real_url,
'actor_photo': actor_photo,
'cover': cover_url,
'poster': '',
'extrafanart': extrafanart,
'trailer': '',
'image_download': image_download,
'image_cut': image_cut,
'log_info': log_info,
'error_info': '',
'req_web': req_web + '(%ss) ' % (round((time.time() - start_time), )),
'mosaic': mosaic,
'wanted': '',
}

debug_info = '数据获取成功!'
log_info += web_info + debug_info
dic['log_info'] = log_info
except Exception as e:
debug_info = '数据生成出错: %s' % str(e)
log_info += web_info + debug_info
raise Exception(debug_info)

except Exception as e:
debug_info = str(e)
Expand Down Expand Up @@ -343,4 +342,7 @@ def main(number, appoint_url='', log_info='', req_web='', language='zh_cn'):
# print(main('LUXU-1217', ''))
# print(main('aldn-334', '')) # 存在系列字段
# print(main('ssni-200', '')) # 存在多个搜索结果
print(main('START-104', '')) # 简介存在无效信息 "*根据分发方式,内容可能会有所不同"
# print(main('START-104', language='zh_tw')) # 简介存在无效信息 "*根据分发方式,内容可能会有所不同"
print(main('abs-141')) # 一个搜索结果
print(main('MIAB-204')) # 多个搜索结果
print(main('ABF-131', '')) # 无码破解