-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtest.py
39 lines (29 loc) · 1.6 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import log
import traceback
from server.spiders.content import crawl_all_sections_articles, crawl_all_sections_videos
from server.spiders.comment import crawl_content_latest_comments, CommentSpider
from server.schedule import crawl_all_content_latest_comment, crawl_all_content_latest_comment_thread, crawl_section_content_latest_comment_thread
from time import time
from server import server
from config import ARTICLE_SECTIONS, VIDEO_SECTIONS, contentTypes
# 获取最近10页的内容
# crawl_all_sections_articles(ARTICLE_SECTIONS, total_page=10)
# crawl_all_sections_videos(VIDEO_SECTIONS, total_page=10)
# 获取这个之前的内容
# crawl_all_sections_videos(VIDEO_SECTIONS, min_published_date='2018-11-17 00:00:00')
# crawl_all_sections_articles(ARTICLE_SECTIONS, min_published_date='2018-11-17 00:00:00')
# 获取新动态的文章
# crawl_all_sections_articles(ARTICLE_SECTIONS, article_order_type=1, is_get_latest_comment=True)
# 获取content所有的评论
# CommentSpider(content_id=337855, crawl_all=True).crawl_comments()
# 抓取时间范围内的评论
# CommentSpider(content_id=4667805, min_comment_time='2018-10-26 12:00:00').crawl_comments()
# 抓取文章中没抓取的评论
# crawl_content_latest_comments(ARTICLE_SECTIONS[0], contentTypes['article'])
# 抓取视频中没抓取的评论
# crawl_content_latest_comments(VIDEO_SECTIONS[1], contentTypes['video'])
#schedul 抓取所有的最近评论
# crawl_all_content_latest_comment()
# crawl_all_content_latest_comment_thread()
# 抓取文章区最近的评论
crawl_section_content_latest_comment_thread(ARTICLE_SECTIONS, contentTypes['article'])