-
Notifications
You must be signed in to change notification settings - Fork 0
/
ebkScraper.js
96 lines (83 loc) · 3.59 KB
/
ebkScraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// Generated by CoffeeScript 1.6.3
(function() {
var EbkScraper, baseUrl, cheerio, request,
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__hasProp = {}.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; },
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
cheerio = require('cheerio');
request = require('request');
baseUrl = "http://kleinanzeigen.ebay.de";
EbkScraper = (function(_super) {
__extends(EbkScraper, _super);
function EbkScraper(opts) {
this.opts = opts;
this._loadOffer = __bind(this._loadOffer, this);
this.scrape = __bind(this.scrape, this);
}
EbkScraper.prototype.scrape = function() {
var self;
self = this;
return request.get(baseUrl + this.opts.targetUrl, self._loadOffer);
};
EbkScraper.prototype._loadOffer = function(err, header, body) {
var $, self;
self = this;
if (err) {
console.error(err);
}
$ = cheerio.load(body);
return $('#srchrslt-adtable > li').each(function() {
return self._extOffer({
titel: $(this).find('.ad-title').html(),
insertTime: $(this).find('.ad-listitem-addon').html().toString().replace(/\s/g, ""),
district: $(this).find('.c-h-adtble-lctn').html().slice(9),
postcode: $(this).find('.c-h-adtble-lctn').html().slice(0, 5),
netRent: $(this).find('.ad-listitem-details > strong').html().replace(/\./g, "").match(/\d*/)[0],
href: baseUrl + $(this).find('.ad-title')['0'].attribs.href,
subtitle: $(this).find('p').html()
}, function(offer) {
return self._checkOffer(offer);
});
});
};
EbkScraper.prototype._extOffer = function(offer, cb) {
return request.get(offer.href, function(err, header, body) {
var $;
if (err) {
console.error(err);
}
$ = cheerio.load(body);
offer.street = $('#street-address').text();
offer.insertDate = $($('.c-attrlist > dd')[1]).text();
offer.id = $($('.c-attrlist > dd')[2]).text();
offer.rooms = $($('.c-attrlist > dd')[3]).find('span').text().replace(/\s/g, "");
offer.size = $($('.c-attrlist > dd')[4]).find('span').text().replace(/\s/g, "");
offer.description = $('p[itemprop="description"]').text();
offer.phone = $($('.viewad-contact-phone')[0]).text();
return cb(offer);
});
};
EbkScraper.prototype._checkOffer = function(offer) {
var _ref, _ref1;
if (!((_ref = offer.district, __indexOf.call(this.opts.district, _ref) >= 0) || (_ref1 = offer.postcode, __indexOf.call(this.opts.postcode, _ref1) >= 0))) {
return;
}
if (offer.netRent < this.opts.netRent.min || offer.netRent > this.opts.netRent.max) {
return;
}
if (offer.rooms < this.opts.rooms.min || offer.rooms > this.opts.rooms.max) {
return;
}
if (offer.size < this.opts.size.min || offer.size > this.opts.size.max) {
return;
}
if (this.opts.regex && !offer.description.match(this.opts.regex)) {
return;
}
return this.emit('match', offer);
};
return EbkScraper;
})(require('events').EventEmitter);
module.exports = EbkScraper;
}).call(this);