diff --git a/SpiderKeeper/app/proxy/contrib/scrapy.py b/SpiderKeeper/app/proxy/contrib/scrapy.py index 9acad39e..8a2ff769 100644 --- a/SpiderKeeper/app/proxy/contrib/scrapy.py +++ b/SpiderKeeper/app/proxy/contrib/scrapy.py @@ -1,4 +1,5 @@ -import datetime, time +import datetime +import time import requests @@ -31,7 +32,9 @@ def get_project_list(self): def delete_project(self, project_name): post_data = dict(project=project_name) - data = request("post", self._scrapyd_url() + "/delproject.json", data=post_data, return_type="json") + data = request( + "post", self._scrapyd_url() + "/delproject.json", data=post_data, return_type="json" + ) return True if data and data['status'] == 'ok' else False def get_spider_list(self, project_name): @@ -57,22 +60,32 @@ def get_job_list(self, project_name, spider_status=None): for item in data[self.spider_status_name_dict[_status]]: start_time, end_time = None, None if item.get('start_time'): - start_time = datetime.datetime.strptime(item['start_time'], '%Y-%m-%d %H:%M:%S.%f') + start_time = datetime.datetime.strptime( + item['start_time'], '%Y-%m-%d %H:%M:%S.%f' + ) if item.get('end_time'): - end_time = datetime.datetime.strptime(item['end_time'], '%Y-%m-%d %H:%M:%S.%f') - result[_status].append(dict(id=item['id'], start_time=start_time, end_time=end_time)) + end_time = datetime.datetime.strptime( + item['end_time'], '%Y-%m-%d %H:%M:%S.%f' + ) + result[_status].append( + dict(id=item['id'], start_time=start_time, end_time=end_time) + ) return result if not spider_status else result[spider_status] def start_spider(self, project_name, spider_name, arguments): post_data = dict(project=project_name, spider=spider_name) post_data.update(arguments) - data = request("post", self._scrapyd_url() + "/schedule.json", data=post_data, return_type="json") + data = request( + "post", self._scrapyd_url() + "/schedule.json", data=post_data, return_type="json" + ) return data['jobid'] if data and data['status'] == 'ok' else None def cancel_spider(self, project_name, job_id): post_data = dict(project=project_name, job=job_id) - data = request("post", self._scrapyd_url() + "/cancel.json", data=post_data, return_type="json") - return data != None + data = request( + "post", self._scrapyd_url() + "/cancel.json", data=post_data, return_type="json" + ) + return data is not None def deploy(self, project_name, file_path): with open(file_path, 'rb') as f: diff --git a/SpiderKeeper/app/proxy/spiderctrl.py b/SpiderKeeper/app/proxy/spiderctrl.py index de01ea65..0c81741c 100644 --- a/SpiderKeeper/app/proxy/spiderctrl.py +++ b/SpiderKeeper/app/proxy/spiderctrl.py @@ -1,9 +1,9 @@ import datetime import random -from functools import reduce from SpiderKeeper.app import db -from SpiderKeeper.app.spider.model import SpiderStatus, JobExecution, JobInstance, Project, JobPriority +from SpiderKeeper.app.spider.model import SpiderStatus, JobExecution, JobInstance, Project, \ + JobPriority class SpiderServiceProxy(object): @@ -87,7 +87,8 @@ def delete_project(self, project): spider_service_instance.delete_project(project.project_name) def get_spider_list(self, project): - spider_instance_list = self.spider_service_instances[0].get_spider_list(project.project_name) + spider_instance_list = self.spider_service_instances[0]\ + .get_spider_list(project.project_name) for spider_instance in spider_instance_list: spider_instance.project_id = project.id return spider_instance_list @@ -119,7 +120,7 @@ def sync_job_status(self, project): db.session.commit() def start_spider(self, job_instance): - project = Project.find_project_by_id(job_instance.project_id) + project = Project.query.get(job_instance.project_id) spider_name = job_instance.spider_name arguments = {} if job_instance.spider_arguments: @@ -154,7 +155,7 @@ def start_spider(self, job_instance): def cancel_spider(self, job_execution): job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id) - project = Project.find_project_by_id(job_instance.project_id) + project = Project.query.get(job_instance.project_id) for spider_service_instance in self.spider_service_instances: if spider_service_instance.server == job_execution.running_on: if spider_service_instance.cancel_spider(project.project_name, job_execution.service_job_execution_id): @@ -171,11 +172,13 @@ def deploy(self, project, file_path): def log_url(self, job_execution): job_instance = JobInstance.find_job_instance_by_id(job_execution.job_instance_id) - project = Project.find_project_by_id(job_instance.project_id) + project = Project.query.get(job_instance.project_id) for spider_service_instance in self.spider_service_instances: if spider_service_instance.server == job_execution.running_on: - return spider_service_instance.log_url(project.project_name, job_instance.spider_name, - job_execution.service_job_execution_id) + return spider_service_instance.log_url( + project.project_name, job_instance.spider_name, + job_execution.service_job_execution_id + ) @property def servers(self): diff --git a/SpiderKeeper/app/schedulers/common.py b/SpiderKeeper/app/schedulers/common.py index 595518a0..10d10f7e 100644 --- a/SpiderKeeper/app/schedulers/common.py +++ b/SpiderKeeper/app/schedulers/common.py @@ -1,7 +1,6 @@ -import threading import time -from SpiderKeeper.app import scheduler, app, agent, db +from SpiderKeeper.app import scheduler, app, agent from SpiderKeeper.app.spider.model import Project, JobInstance, SpiderInstance @@ -51,7 +50,9 @@ def reload_runnable_spider_job_execution(): available_job_ids = set() # add new job to schedule for job_instance in JobInstance.query.filter_by(enabled=0, run_type="periodic").all(): - job_id = "spider_job_%s:%s" % (job_instance.id, int(time.mktime(job_instance.date_modified.timetuple()))) + job_id = "spider_job_%s:%s" % ( + job_instance.id, int(time.mktime(job_instance.date_modified.timetuple())) + ) available_job_ids.add(job_id) if job_id not in running_job_ids: scheduler.add_job(run_spider_job, @@ -67,8 +68,11 @@ def reload_runnable_spider_job_execution(): max_instances=999, misfire_grace_time=60 * 60, coalesce=True) - app.logger.info('[load_spider_job][project:%s][spider_name:%s][job_instance_id:%s][job_id:%s]' % ( - job_instance.project_id, job_instance.spider_name, job_instance.id, job_id)) + app.logger.info( + '[load_spider_job][project:%s][spider_name:%s][job_instance_id:%s][job_id:%s]' % ( + job_instance.project_id, job_instance.spider_name, job_instance.id, job_id + ) + ) # remove invalid jobs for invalid_job_id in filter(lambda job_id: job_id.startswith("spider_job_"), running_job_ids.difference(available_job_ids)): diff --git a/SpiderKeeper/app/spider/controller.py b/SpiderKeeper/app/spider/controller.py index 592b2768..78ba19de 100644 --- a/SpiderKeeper/app/spider/controller.py +++ b/SpiderKeeper/app/spider/controller.py @@ -59,7 +59,6 @@ class SpiderCtrl(flask_restful.Resource): "dataType": 'int' }]) def get(self, project_id): - project = Project.find_project_by_id(project_id) return [spider_instance.to_dict() for spider_instance in SpiderInstance.query.filter_by(project_id=project_id).all()] @@ -452,7 +451,7 @@ def inject_project(): project = Project.query.first() session['project_id'] = project.id if session.get('project_id'): - project_context['project'] = Project.find_project_by_id(session['project_id']) + project_context['project'] = Project.query.get(session['project_id']) project_context['spider_list'] = [spider_instance.to_dict() for spider_instance in SpiderInstance.query.filter_by(project_id=session['project_id']).all()] else: @@ -517,7 +516,7 @@ def project_create(): @app.route("/project//delete") def project_delete(project_id): - project = Project.find_project_by_id(project_id) + project = Project.query.get(project_id) agent.delete_project(project) db.session.delete(project) db.session.commit() @@ -536,7 +535,6 @@ def job_dashboard(project_id): @app.route("/project//job/periodic") def job_periodic(project_id): - project = Project.find_project_by_id(project_id) job_instance_list = [job_instance.to_dict() for job_instance in JobInstance.query.filter_by(run_type="periodic", project_id=project_id).all()] return render_template("job_periodic.html", @@ -545,7 +543,6 @@ def job_periodic(project_id): @app.route("/project//job/add", methods=['post']) def job_add(project_id): - project = Project.find_project_by_id(project_id) job_instance = JobInstance() job_instance.spider_name = request.form['spider_name'] job_instance.project_id = project_id @@ -602,9 +599,9 @@ def job_run(project_id, job_instance_id): return redirect(request.referrer, code=302) -@app.route("/project//job//remove") -def job_remove(project_id, job_instance_id): - job_instance = JobInstance.query.filter_by(project_id=project_id, id=job_instance_id).first() +@app.route("/job//remove") +def job_remove(job_instance_id): + job_instance = JobInstance.query.get(job_instance_id) db.session.delete(job_instance) db.session.commit() return redirect(request.referrer, code=302) @@ -627,13 +624,12 @@ def spider_dashboard(project_id): @app.route("/project//spider/deploy") def spider_deploy(project_id): - project = Project.find_project_by_id(project_id) return render_template("spider_deploy.html") @app.route("/project//spider/upload", methods=['post']) def spider_egg_upload(project_id): - project = Project.find_project_by_id(project_id) + project = Project.query.get(project_id) if 'file' not in request.files: flash('No file part') return redirect(request.referrer) @@ -654,13 +650,11 @@ def spider_egg_upload(project_id): @app.route("/project//project/stats") def project_stats(project_id): - project = Project.find_project_by_id(project_id) run_stats = JobExecution.list_run_stats_by_hours(project_id) return render_template("project_stats.html", run_stats=run_stats) @app.route("/project//server/stats") def service_stats(project_id): - project = Project.find_project_by_id(project_id) run_stats = JobExecution.list_run_stats_by_hours(project_id) return render_template("server_stats.html", run_stats=run_stats) diff --git a/SpiderKeeper/app/spider/model.py b/SpiderKeeper/app/spider/model.py index 5376602b..5e01c3b7 100644 --- a/SpiderKeeper/app/spider/model.py +++ b/SpiderKeeper/app/spider/model.py @@ -1,5 +1,6 @@ import datetime from sqlalchemy import desc +from sqlalchemy.orm import relation from SpiderKeeper.app import db, Base @@ -16,10 +17,6 @@ def load_project(cls, project_list): db.session.add(project) db.session.commit() - @classmethod - def find_project_by_id(cls, project_id): - return Project.query.filter_by(id=project_id).first() - def to_dict(self): return { "project_id": self.id, @@ -36,8 +33,9 @@ class SpiderInstance(Base): @classmethod def update_spider_instances(cls, project_id, spider_instance_list): for spider_instance in spider_instance_list: - existed_spider_instance = cls.query.filter_by(project_id=project_id, - spider_name=spider_instance.spider_name).first() + existed_spider_instance = cls.query.filter_by( + project_id=project_id, spider_name=spider_instance.spider_name + ).first() if not existed_spider_instance: db.session.add(spider_instance) db.session.commit() @@ -152,7 +150,9 @@ class JobExecution(Base): project_id = db.Column(db.INTEGER, nullable=False, index=True) service_job_execution_id = db.Column(db.String(50), nullable=False, index=True) - job_instance_id = db.Column(db.INTEGER, nullable=False, index=True) + job_instance_id = db.Column(db.INTEGER, + db.ForeignKey('sk_job_instance.id'), nullable=False, index=True) + job_instance = relation(JobInstance) create_time = db.Column(db.DATETIME) start_time = db.Column(db.DATETIME) end_time = db.Column(db.DATETIME) @@ -160,7 +160,6 @@ class JobExecution(Base): running_on = db.Column(db.Text) def to_dict(self): - job_instance = JobInstance.query.filter_by(id=self.job_instance_id).first() return { 'project_id': self.project_id, 'job_execution_id': self.id, @@ -171,7 +170,7 @@ def to_dict(self): 'end_time': self.end_time.strftime('%Y-%m-%d %H:%M:%S') if self.end_time else None, 'running_status': self.running_status, 'running_on': self.running_on, - 'job_instance': job_instance.to_dict() if job_instance else {} + 'job_instance': self.job_instance } @classmethod diff --git a/SpiderKeeper/app/templates/job_dashboard.html b/SpiderKeeper/app/templates/job_dashboard.html index 494b93d7..52e33ed6 100644 --- a/SpiderKeeper/app/templates/job_dashboard.html +++ b/SpiderKeeper/app/templates/job_dashboard.html @@ -10,7 +10,7 @@

Job Dashboard

top: 15px; right: 10px;"> {% endblock %} diff --git a/SpiderKeeper/app/templates/job_periodic.html b/SpiderKeeper/app/templates/job_periodic.html index beeb9b94..2811f8aa 100644 --- a/SpiderKeeper/app/templates/job_periodic.html +++ b/SpiderKeeper/app/templates/job_periodic.html @@ -79,8 +79,8 @@

Periodic jobs (Spiders)

Run - Remove + + Remove {% endfor %} diff --git a/SpiderKeeper/app/util/__init__.py b/SpiderKeeper/app/util/__init__.py index 8dd75172..f8d0d14d 100644 --- a/SpiderKeeper/app/util/__init__.py +++ b/SpiderKeeper/app/util/__init__.py @@ -1,4 +1,5 @@ def project_path(): - import inspect, os + import os + import inspect this_file = inspect.getfile(inspect.currentframe()) - return os.path.abspath(os.path.dirname(this_file)+'/../') \ No newline at end of file + return os.path.abspath(os.path.dirname(this_file)+'/../') diff --git a/SpiderKeeper/app/util/http.py b/SpiderKeeper/app/util/http.py index ecc8f441..c0e81386 100644 --- a/SpiderKeeper/app/util/http.py +++ b/SpiderKeeper/app/util/http.py @@ -47,8 +47,10 @@ def request(request_type, url, data=None, retry_times=5, return_type="text"): res = request_get(url, retry_times) if request_type == 'post': res = request_post(url, data, retry_times) - if not res: return res - if return_type == 'text': return res.text + if not res: + return res + if return_type == 'text': + return res.text if return_type == 'json': try: res = res.json()