This repository has been archived by the owner on Feb 19, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathscraper.js
executable file
·93 lines (80 loc) · 2.04 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
'use strict';
var system = require('system');
var webpage = require('webpage');
var _ = require('lodash');
var debugMode = false;
var url = system.args[1];
var page = webpage.create();
page.settings.loadImages = false;
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0';
page.settings.resourceTimeout = 20000;
if (debugMode) {
debugger;
page.evaluateAsync(function() {
debugger;
});
page.onConsoleMessage = function(msg) {
console.log('> ' + msg);
}
}
else {
//Suppress calltraces from page
page.onError = function () {}
}
page.open(url, function(status) {
if (status === 'fail')
returnValue({error: 'unreachable'});
var swagger = scrapePage();
if (swagger)
returnSwagger(swagger);
/* try one more time after small sleep */
var tries = 5;
var intervalId = setInterval(function () {
if (!isLoadingComplete() && --tries > 0)
return;
clearInterval(intervalId);
var swagger = scrapePage();
returnSwagger(swagger);
}, 2000);
});
function returnSwagger(swagger) {
if (!swagger)
returnValue({error: 'not_swagger'});
returnValue({swagger: swagger});
}
function returnValue(value) {
value.url = url;
console.log(JSON.stringify(value));
phantom.exit();
}
function isLoadingComplete() {
return page.evaluate(function() {
console.log('state: ' + document.readyState);
return document.readyState === 'complete';
});
}
function scrapePage() {
var swagger = scrapeSwaggerUi();
for (var i = 0; i < page.framesCount; ++i) {
page.switchToChildFrame(i);
var result = scrapeSwaggerUi();
page.switchToParentFrame();
if (result) {
if (swagger)
throw 'duplicate swaggerUi';
swagger = result;
}
}
return swagger;
}
function scrapeSwaggerUi() {
return page.evaluate(function() {
if (!window.swaggerUi) {
console.log('missing swaggerUi');
return;
}
var options = window.swaggerUi.options;
console.log('options:' + JSON.stringify(options));
return options.url || options.discoveryUrl;
});
}