-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
253 lines (222 loc) · 9.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding: utf-8 -*-
import json
import os
import re
import urllib.request
from urllib import parse
import time
# 쓰레드, 큐를 위한 라이브러리 추가
import multiprocessing as mp
from threading import Thread
from bs4 import BeautifulSoup
from slackclient import SlackClient
from flask import Flask, request, make_response, render_template
from selenium import webdriver
from operator import itemgetter
from requests.sessions import Session
app = Flask(__name__)
generations = ['전체 연령대', '10대', '20대', '30대', '40대', '50대']
slack_token = '__your_token__'
slack_client_id = '__your_client_id__'
slack_client_secret = '__your_client_secret__'
slack_verification = '__your_verification__'
sc = SlackClient(slack_token)
global_words = []
client_msg_id_history = []
def urlRequest(url_str):
with urllib.request.urlopen(url_str) as site:
return site.read()
def parseHtml(site):
return BeautifulSoup(site, 'html.parser')
def navKeywordsURL(parsed_html):
soup = parsed_html.find('div', class_=re.compile('area_hotkeyword.*'))
url = soup.find('a', class_='ah_ha', href=re.compile('https?://datalab.naver.com/.*'))
return url['href']
def navKeywordsCrawling(site, type):
soup = parseHtml(site)
keywords = []
list = None
for ranking in soup.find_all('div', class_='keyword_rank'):
ages = ranking.find('strong', class_='rank_title v2').get_text()
if ages == type:
list = ranking
break
if list:
for word_list in list.find_all('li', class_='list'):
keywords.append(word_list.find('span', class_='title').get_text())
return keywords, type
def youtubeCrawling(query_word):
list_href = []
url = "https://www.youtube.com/results?search_query=" + parse.quote(query_word)
req = urllib.request.Request(url)
sourcecode = urllib.request.urlopen(url).read()
soup = BeautifulSoup(sourcecode, "html.parser")
for i, keyword in enumerate(soup.find_all("div", class_="yt-lockup-content")):
#print(keyword)
if i < 10:
try:
if 'list' in keyword.find("a", class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ").get("href"):
continue
list_href.append(["https://www.youtube.com" + keyword.find("a", class_="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ").get("href")])
list_href[-1].append(keyword.find("span").get_text())
list_href[-1].append(keyword.find("a", class_="yt-uix-sessionlink spf-link ").get_text())
list_href[-1].append(keyword.find("span", class_="accessible-description").get_text().split('길이: ')[1])
list_href[-1].append(keyword.find("ul", class_="yt-lockup-meta-info").get_text().split()[0])
list_href[-1].append(keyword.find("ul", class_="yt-lockup-meta-info").get_text().split()[2][:-1])
except:
pass
for i in range(0, len(list_href)):
url = str(list_href[i][0])
req = urllib.request.Request(url)
sourcecode = urllib.request.urlopen(url).read()
soup = BeautifulSoup(sourcecode, "html.parser")
try:
keyword = str(soup.find_all("strong", class_="watch-time-text")).split(': ')[1]
list_href[i].insert(3, keyword.split('<')[0])
except:
list_href[i].insert(3, '')
list_href = sorted(list_href, key=get_freq, reverse=True)
return list_href
def get_freq(list_href):
list_href = list_href[-1].split(',')
view = str()
for i in range(0, len(list_href)):
view = view + list_href[i]
try:
return int(view)
except:
return 0
def getKeywords(type):
driver = webdriver.Chrome('C:\\Users\\student\\Desktop\\chromedriver_win32\\chromedriver')
driver.implicitly_wait(2)
driver.get('https://datalab.naver.com/keyword/realtimeList.naver?where=main')
html = driver.page_source
keywords, type = navKeywordsCrawling(html, type)
driver.quit()
return keywords[:10]
# threading function
def processing_event(queue):
while True:
global client_msg_id_history
# 큐가 비어있지 않은 경우 로직 실행
if not queue.empty():
if len(client_msg_id_history) > 20:
client_msg_id_history.clear()
slack_event = queue.get()
if 'client_msg_id' in slack_event['event']:
msg_id = slack_event['event']['client_msg_id']
if msg_id in client_msg_id_history:
pass
else:
client_msg_id_history.append(msg_id)
# Your Processing Code Block gose to here
channel = slack_event["event"]["channel"]
text = slack_event["event"]["text"]
if text:
matching = re.search(r'(<\S+>) (.*)', text)
# 챗봇 크롤링 프로세스 로직 함수
if matching:
attachments_list = processing_function(matching.group(2))
sc.api_call("chat.postMessage",
channel= slack_event["event"]["channel"],
text='',
attachments=attachments_list
)
else:
sc.api_call("chat.postMessage",
channel= slack_event["event"]["channel"],
text='저는 연령대별로 급상승 검색어를 찾아드리고, 검색어에 따른 유튜브 동영상을 검색해드려요!\n알고싶은 연령대를 저를 태그해서 입력해주세요. e.g. @TAG 10대'
)
# 크롤링 함수
def processing_function(text_msg):
global global_words
attachments_list = []
# 함수를 구현해 주세요
if text_msg in generations:
global_words = getKeywords(text_msg)
global_words.append('위 키워드 중 하나를 입력해주세요.')
msg_options = {
'color': '#36a64f',
'pretext' : '{}가 많이 검색하는 top 10 키워드입니다'.format(text_msg),
'author_name' : 'SSAFY_GUMI_3_YOUTUVER',
'title' : '키워드 목록',
'text' : "\n".join(global_words),
'footer' : 'Youtuver',
'footer_icon': 'https://platform.slack-edge.com/img/default_application_icon.png'
}
attachments_list.append(msg_options)
else:
if not global_words:
msg_options = {
'color': '#ff2400',
'text' : '연령대를 입력해주세요. 50대까지 가능해요! e.g. 전체 연령대, 10대, 20대',
'footer' : 'Youtuver',
'footer_icon': 'https://platform.slack-edge.com/img/default_application_icon.png'
}
attachments_list.append(msg_options)
elif text_msg in global_words:
youtubeList = youtubeCrawling(text_msg)
'''msg_options = {
'color': '#36a64f',
'pretext' : '{}에 대해 검색한 결과입니다'.format(word),
'author_name' : 'SSAFY_GUMI_3_YOUTUVER',
'title' : '유튜브 동영상 목록',
'title_link' : youtubeList[0][0],
'text' : '마하반야 바라밀다',
'footer' : 'Youtuver',
'footer_icon': 'https://platform.slack-edge.com/img/default_application_icon.png'
}
'''
for youtubeEntity in youtubeList:
try:
msg_options = {
'color' : '#36a64f',
'title' : youtubeEntity[1],
'title_link' : youtubeEntity[0],
'author_name' : youtubeEntity[2] if youtubeEntity[2].find('재생목록') == -1 else '',
'text' : '업로드 날짜 : {} , 영상길이 : {} 조회수 : {}'.format(youtubeEntity[3], youtubeEntity[4], youtubeEntity[6])
}
attachments_list.append(msg_options)
except:
pass
else:
msg_options = {
'color': '#ff2400',
'text' : '유효하지 않은 키워드입니다. 다시 검색해주세요',
'footer' : 'Youtuver',
'footer_icon': 'https://platform.slack-edge.com/img/default_application_icon.png'
}
attachments_list.append(msg_options)
return attachments_list
# 이벤트 핸들하는 함수
def _event_handler(event_type, slack_event):
if event_type == "app_mention":
event_queue.put(slack_event)
return make_response("App mention message has been sent", 200, )
@app.route("/slackbot", methods=["GET", "POST"])
def hears():
slack_event = json.loads(request.data)
if "challenge" in slack_event:
return make_response(slack_event["challenge"], 200, {"content_type":
"application/json"
})
if slack_verification != slack_event.get("token"):
message = "Invalid Slack verification token: %s" % (slack_event["token"])
make_response(message, 403, {"X-Slack-No-Retry": 1})
if "event" in slack_event:
event_type = slack_event["event"]["type"]
return _event_handler(event_type, slack_event)
# If our bot hears things that are not events we've subscribed to,
# send a quirky but helpful error response
return make_response("[NO EVENT IN SLACK REQUEST] These are not the droids\
you're looking for.", 404, {"X-Slack-No-Retry": 1})
@app.route("/", methods=["GET"])
def index():
return "<h1>Server is ready.</h1>"
if __name__ == '__main__':
event_queue = mp.Queue()
p = Thread(target=processing_event, args=(event_queue,))
p.start()
print("subprocess started")
app.run('0.0.0.0', port=8080)
p.join()