-
Notifications
You must be signed in to change notification settings - Fork 0
/
ithome.py
128 lines (98 loc) · 2.99 KB
/
ithome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# coding=utf-8
import requests
import urllib.request
import io
import sys
import pymysql
from bs4 import BeautifulSoup
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
# 打开数据库连接并获取cursor
def connectDatabase():
db = pymysql.connect(host='47.99.54.53', port=3306, user='root', passwd='Dan05174530~', db='ithouse',
charset='utf8')
# 使用 cursor() 方法创建一个游标对象 cursor
# cursor = db.cursor()
return db
# 创建数据库
def createDatabase():
db = connectDatabase()
cursor = db.cursor()
# 使用 execute() 方法执行 SQL,如果表存在则删除
cursor.execute("DROP TABLE IF EXISTS newslist")
# 使用预处理语句创建表
sql = """CREATE TABLE newslist (
title CHAR(200),
detail_href CHAR(200),
pic CHAR(100),
time CHAR(100),
comment_num CHAR(100))"""
cursor.execute(sql)
# 插入数据到数据库表
def insertData2Newslist(dataList):
db = connectDatabase()
cursor = db.cursor()
#先删除表内容
sql = "DELETE FROM newslist"
try:
# 执行SQL语句
cursor.execute(sql)
# 提交修改
db.commit()
except:
# 发生错误时回滚
db.rollback()
#SQL 插入语句
sql = "INSERT INTO newslist(title,detail_href,pic,time,comment_num) VALUES (%s,%s,%s,%s,%s)"
try:
# 执行sql语句
# cursor.execute(sql)
cursor.executemany(sql, dataList)
# 提交到数据库执行
db.commit()
except:
# 如果发生错误则回滚
db.rollback()
info = sys.exc_info()
print(info[0], ":", info[1])
# 关闭数据库连接
db.close()
def getithome():
url = "https://www.ithome.com/"
html = urllib.request.urlopen(url).read()
htmlUtf8 = html.decode("utf-8", "ignore")
soup = BeautifulSoup(htmlUtf8, 'lxml')
# print(soup.prettify())
# news_titles = soup.select(".nlquan a")
# fp = open("C:/Users/Danmo/Desktop/ssss.txt", "w+", encoding="utf-8")
#
# for kk in news_titles:
# mm = kk.get_text()
# print(mm)
# fp.write(mm)
# fp.write("\n")
list = []
content_title = soup.select(".new")
for content_titleItem in content_title:
time = content_titleItem.select(".date")[0].get_text()
content = content_titleItem.select(".title > a")[0].get_text()
href = content_titleItem.select(".title > a")[0].get('href')
list.append((content, href, "www.baidu.com/pic", time, 199))
#插入到数据库
insertData2Newslist(list)
# for gg in content_title:
# title = gg.get_text()
# detail_href = gg.get('href')
# print(detail_href)
# fp.write(href)
# fp.write("\n")
# print(mm)
# fp.write(mm)
# fp.write("\n")
# fp.close()
def main():
# getithome()
# createDatabase()
# 获取标题列表
getithome()
# 460043274408481
main()