import requests
import re
import os
import time
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
"""请求网页"""
#response = requests.get("https://www.vmgirls.com/15198.html", headers=headers)
response = requests.get("https://www.vmgirls.com/15270.html", headers=headers)
html = response.text
# print(html)
"""解析网页"""
#dir_name = re.findall('<title>(.*?)</title>', html)[-1]
dir_name = re.findall('<h1 class="post-title h1">(.*?)</h1>',html)[-1]
if not os.path.exists(dir_name):
os.mkdir(dir_name)
urls = re.findall('<a href="(.*?)" alt=".*?" title=".*?">', html)
print(urls)
"""保存图片"""
for url in urls:
time.sleep(2)
file_name = url.split('/')[-1] # 文件命名
print("https:" + url)
response = requests.get("https:" + url, headers=headers)
with open(dir_name + '/' + file_name, 'wb') as f:
f.write(response.content)
f.close()
# 遍历目录
def walkFile(file):
list = []
for root, dirs, files in os.walk(file):
for f in files:
name = "{}".format(f).split('.')[-2]
list.append("* [{}](计算机网络/{})".format(name, f))
return list
if __name__ == '__main__':
file = "C:\\Users\zpparts\Desktop\SoleilNotes\计算机网络"
list = walkFile(file)
for v in list:
print(v)