Skip to content
This repository has been archived by the owner on Mar 10, 2020. It is now read-only.

Commit

Permalink
-
Browse files Browse the repository at this point in the history
  • Loading branch information
blurHY committed Apr 18, 2019
1 parent 401f948 commit 0054bf4
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 19 deletions.
4 changes: 2 additions & 2 deletions App.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ async function waitAndGetAdmin() {
followRedirect: false
})
} catch {
signale.info("Sent request to trigger ZeroHello downloading.")
signale.note("Sent request to trigger ZeroHello downloading.")
await delay(process.env.mainLoopInterval)
}
} else
Expand Down Expand Up @@ -93,7 +93,7 @@ async function crawlASite(siteInfo) {
isNewSite = false

if (!siteObj) { // Site not found, create one
signale.santa(`Discovered a brand new site ${siteInfo.address}`)
signale.fav(`Discovered a brand new site ${siteInfo.address}`)
siteObj = DataBase.genNewSite(siteInfo) // Init with siteInfo
isNewSite = true
} else if (new Date() - siteObj.runtimeInfo.lastCrawl.siteInfo > process.env.siteInfoUpdateInterval || 3600000) { // Update siteInfo
Expand Down
7 changes: 4 additions & 3 deletions Crawlers/DataBaseExplore.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ async function pagingCrawl(siteDB, siteObj, table_name, start = 0, count = 3000)
if (rows.length === 0)
return
await chillout.forEach(rows, async row => {
for (let field_name in row)
if (typeof row[field_name] === "string")
await linksExtractor.findLinksAndSave(row[field_name], siteObj._id, "site")
await chillout.forEach(row, async (val) => {
if (typeof val === "string")
await linksExtractor.findLinksAndSave(val, siteObj._id, "site")
})
})
await pagingCrawl(siteDB, siteObj, table_name, start + count, count)
}
20 changes: 8 additions & 12 deletions Crawlers/FeedFollow.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,18 @@ async function updateFeeds(dbSchema, siteDB, siteObj) {
if (!(siteDB && dbSchema && dbSchema.feeds))
return

signale.info(`Feeds available for ${siteObj.basicInfo.address}`)
signale.note(`Feeds available for ${siteObj.basicInfo.address}`)

let query, func = s => s,
now = new Date(),
let now = new Date(),
lastDate = 0

if (siteObj.runtimeInfo.lastCrawl.feeds && siteObj.runtimeInfo.lastCrawl.feeds.full > now - (process.env.FeedRecrawlInterval || 7200000)) { // Not too outdated
if (!siteObj.runtimeInfo.lastCrawl.feeds.check || siteObj.runtimeInfo.lastCrawl.feeds.check < now - (process.env.FeedCheckInterval || 3600000)) { // New feeds only
func = s => `SELECT * FROM (${s}) where date_added > ${siteObj.runtimeInfo.lastCrawl.feeds.check.getTime() / 1000}` // Not needed to add the outer where clause to inner, because of the sqlite optimization
signale.info(`Check feeds for ${siteObj.basicInfo.address}`)
signale.note(`Check feeds for ${siteObj.basicInfo.address}`)
lastDate = siteObj.runtimeInfo.lastCrawl.feeds.check
siteObj.runtimeInfo.lastCrawl.feeds.check = now
} else {
signale.info(`Stored feeds are up to date ${siteObj.basicInfo.address}`)
signale.note(`Stored feeds are up to date ${siteObj.basicInfo.address}`)
return
}
} else {
Expand All @@ -28,18 +26,16 @@ async function updateFeeds(dbSchema, siteDB, siteObj) {
siteObj.runtimeInfo.lastCrawl.feeds = {}
signale.info(`Re-crawl all feeds for ${siteObj.basicInfo.address}`)
if (!siteObj.runtimeInfo.lastCrawl.feeds || siteObj.runtimeInfo.lastCrawl.feeds.full === 0)
signale.info("lastCrawl.optional.full is 0")
signale.note("lastCrawl.optional.full is 0")
await DataBase.feeds.deleteMany({ site: siteObj._id })
siteObj.feedsQueried.splice(0) // Clear old data and re-query all feeds
siteObj.runtimeInfo.lastCrawl.feeds.full = now
siteObj.runtimeInfo.lastCrawl.feeds.check = now
}

for (let name in dbSchema.feeds)
if (name) {
query = func(dbSchema.feeds[name])
await pagingFeedQuery(query, siteDB, siteObj, name, 3000, 0, lastDate ? lastDate.getTime() / 1000 : 0)
}
if (name)
await pagingFeedQuery(dbSchema.feeds[name], siteDB, siteObj, name, 3000, 0, lastDate ? lastDate.getTime() / 1000 : 0)
}

async function pagingFeedQuery(query, siteDB, siteObj, name, count = 3000, start = 0, dateAfter = null) {
Expand All @@ -65,7 +61,7 @@ async function pagingFeedQuery(query, siteDB, siteObj, name, count = 3000, start
await pagingFeedQuery(ori_query, siteDB, siteObj, name, count, start + count, dateAfter) // Query and store next page
}
} catch (e) {
signale.error(query, `An error appeared in query ${query}`)
signale.error(query, e)
}
}

Expand Down
6 changes: 5 additions & 1 deletion DataBase.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class DataBase extends EventEmitter {
feedObjs.push(feedObj)
}
await this.feeds.insertMany(feedObjs)
signale.star(`DB operation: Added ${feedObjs.length} feeds`)
}
async addOptionalFiles(site, optionals) {
if (optionals.length <= 0 || !site)
Expand All @@ -64,6 +65,7 @@ class DataBase extends EventEmitter {
oObjs.push(oObj)
}
await this.opfiles.insertMany(oObjs)
signale.star(`DB operation: Added ${oObjs.length} optional files`)
}
genNewSite(siteInfo) { // Generate a site obj with siteInfo
let site = {
Expand Down Expand Up @@ -120,8 +122,10 @@ class DataBase extends EventEmitter {
}
async addLinks(objs) {
try {
if (objs.length > 0)
if (objs.length > 0) {
await this.links.insertMany(objs, { ordered: false })
signale.star(`DB operation: Added ${objs.length} links`)
}
} catch (e) {
// BulkWriteError is expected
}
Expand Down
2 changes: 1 addition & 1 deletion ZeroNet/ZeroWs.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ module.exports = class ZeroWs extends EventEmitter {
}

onClose(e) {
signale.warn("Connection to ZeroNet has been closed", e)
signale.fatal("Connection to ZeroNet has been closed", e)
if (!this.reconnecting) {
this.reconnecting = true
setTimeout(() => {
Expand Down

0 comments on commit 0054bf4

Please sign in to comment.