-
Notifications
You must be signed in to change notification settings - Fork 0
/
readHistogram.js
49 lines (44 loc) · 1.77 KB
/
readHistogram.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
// This function reads only the Histogram files generated by eBird.
// This is useful for seeing what birds have already been seen, but does not
// cover seeing all birds which might be seen or which have been seen outside
// of the area of the Histogram file.
const fs = require('fs').promises
const removeEmptyLines = require("remove-blank-lines");
async function getData (input) {
let results = {}
if (fs) {
// Yes, this is brittle. I don't expect this format to change soon.
input = removeEmptyLines(await fs.readFile(input, 'utf8')).split('\n')
results.taxa = input.filter(s => s.includes('Number of taxa'))[0].split('\t')[1]
results.sampleSize = input.filter(s => s.includes('Sample Size'))[0].split('\t').slice(1).filter(x => x !== '')
input = input.slice(4,-1).map(species => {
let dict = {}
let speciesMatch = species.split('\t')[0].match(/(?<species>.*) \(.*>(?<latin>.*)</)
let speciesName = speciesMatch.groups.species
let scientificName = speciesMatch.groups.latin
let frequency = species.split('\t').slice(1)
// Basically, remove spuh
let paraspecial = ['sp.', '/']
if (!paraspecial.some(s => speciesName.includes(s))) {
console.log(speciesName)
dict[speciesName] = {
species: speciesName,
'Scientific Name': scientificName,
frequency
}
return dict
}
return null
})
results.species = input.filter(s => s !== null)
}
console.log(results.species.length)
return results
}
// TODO Is there a way to automatically generate these? As in, download them each morning from eBird?
async function washingtonCounty2020 () {
return await getData('data/ebird_US-VT-023__2020_2020_1_12_barchart.txt')
}
module.exports = {
washingtonCounty2020
}