From 5b0bb19ed15b74feb11a0b6ffaf1f937e147ab8d Mon Sep 17 00:00:00 2001 From: laki9 Date: Tue, 18 Apr 2017 13:56:55 +0300 Subject: [PATCH] add phantomjs proxy support --- pyspider/fetcher/phantomjs_fetcher.js | 6 ++++++ pyspider/libs/base_handler.py | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyspider/fetcher/phantomjs_fetcher.js b/pyspider/fetcher/phantomjs_fetcher.js index 9d8493a53..43f356072 100644 --- a/pyspider/fetcher/phantomjs_fetcher.js +++ b/pyspider/fetcher/phantomjs_fetcher.js @@ -48,6 +48,12 @@ if (system.args.length !== 2) { // create and set page var page = webpage.create(); + if (fetch.proxy) { + if (fetch.proxy.indexOf('://') == -1){ + fetch.proxy = 'http://' + fetch.proxy + } + page.setProxy(fetch.proxy); + } page.onConsoleMessage = function(msg) { console.log('console: ' + msg); }; diff --git a/pyspider/libs/base_handler.py b/pyspider/libs/base_handler.py index d18b98de8..d0f669ac8 100644 --- a/pyspider/libs/base_handler.py +++ b/pyspider/libs/base_handler.py @@ -329,10 +329,6 @@ def _crawl(self, url, **kwargs): if self.is_debugger(): task = self.task_join_crawl_config(task, self.crawl_config) - if task['fetch'].get('proxy', False) and task['fetch'].get('fetch_type', None) in ('js', 'phantomjs') \ - and not hasattr(self, '_proxy_warning'): - self.logger.warning('phantomjs does not support specify proxy from script, use phantomjs args instead') - self._proxy_warning = True cache_key = "%(project)s:%(taskid)s" % task if cache_key not in self._follows_keys: