Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delegate Common Ancestor #434

Merged
merged 4 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 90 additions & 205 deletions lib/controllers/v1/computervision_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ const squel = require( "safe-squel" );
const md5 = require( "md5" );
const PromisePool = require( "es6-promise-pool" );
const pgClient = require( "../../pg_client" );
const redisClient = require( "../../redis_client" );
const ObservationsController = require( "./observations_controller" );
const TaxaController = require( "./taxa_controller" );
const InaturalistAPI = require( "../../inaturalist_api" );
const config = require( "../../../config" );
const util = require( "../../util" );
const ESModel = require( "../../models/es_model" );
const Taxon = require( "../../models/taxon" );
const Observation = require( "../../models/observation" );
const ObservationPreload = require( "../../models/observation_preload" );

// number of image results checked for common ancestor
const DEFAULT_ANCESTOR_WINDOW = 10;
Expand Down Expand Up @@ -80,13 +81,21 @@ const ComputervisionController = class ComputervisionController {
if ( !obsID ) {
throw util.httpError( 422, "Missing observation ID or UUID" );
}
const searchReq = { query: { id: obsID } };
// fetch the obs metadata
const response = await ObservationsController.search( searchReq );
if ( !response || _.isEmpty( response.results ) ) {
const obsObject = [{
observation_id: obsID
}];
const obsOpts = {
source: {
includes: ["id", "taxon", "location"]
}
};
await ESModel.fetchBelongsTo( obsObject, Observation, obsOpts );
const { observation } = obsObject[0];
if ( !observation ) {
throw util.httpError( 422, "Unknown observation" );
}
const observation = response.results[0];

await ObservationPreload.observationPhotos( [observation] );
let photoURL;
_.each( observation.photos, p => {
if ( photoURL ) { return; }
Expand All @@ -107,7 +116,7 @@ const ComputervisionController = class ComputervisionController {
if ( !req.userSession && !req.applicationSession ) {
throw util.httpError( 401, "Unauthorized" );
}
const photoURL = req.query.image_url;
const photoURL = req.query.image_url || req.body.image_url;
if ( !photoURL ) {
throw util.httpError( 422, "No scorable photo" );
}
Expand Down Expand Up @@ -142,6 +151,7 @@ const ComputervisionController = class ComputervisionController {
}
} );
if ( !scoreImageReq.body ) { scoreImageReq.body = { }; }
delete scoreImageReq.body.image_url;
scoreImageReq.body.lat = scoreImageReq.body.lat || req.query.lat;
scoreImageReq.body.lng = scoreImageReq.body.lng || req.query.lng;
scoreImageReq.body.radius = scoreImageReq.body.radius || req.query.radius;
Expand Down Expand Up @@ -170,6 +180,9 @@ const ComputervisionController = class ComputervisionController {
if ( !req.userSession && !req.applicationSession ) {
throw util.httpError( 401, "Unauthorized" );
}
if ( req.userSession?.isAdmin && req.body.image_url ) {
return ComputervisionController.scoreImageURL( req );
}
req.file = req.file || (
req.files
&& req.files.image
Expand All @@ -189,6 +202,13 @@ const ComputervisionController = class ComputervisionController {
knownLength: fs.statSync( uploadPath ).size
} );
formData.append( "geomodel", "true" );
if ( req.body.delegate_ca || req.query.delegate_ca ) {
formData.append( "aggregated", "true" );
formData.append( "format", "object" );
if ( req.body.taxon_id ) {
formData.append( "taxon_id", req.body.taxon_id );
}
}
if ( !req.body.skip_frequencies && req.body.lat && req.body.lng ) {
formData.append( "lat", req.body.lat );
formData.append( "lng", req.body.lng );
Expand All @@ -214,37 +234,7 @@ const ComputervisionController = class ComputervisionController {
} finally {
clearTimeout( requestTimeout );
}

const scores = _.map( json, score => ( {
taxon_id: Number( score.id ),
vision_score: score.vision_score,
combined_score: score.combined_score,
original_geo_score: score.geo_score,
original_combined_score: score.combined_score,
geo_threshold: score.geo_threshold
} ) );
const numericalCompareProperty = "combined_score";
const sumOfCombinedScores = _.sum( _.map( scores, "combined_score" ) );
if ( sumOfCombinedScores === 0 ) {
_.each( scores, s => { s.combined_score = s.vision_score; } );
}

// replace inactive taxa with their active counterparts, remove remaining inactive
const r = await TaxaController.replaceInactiveTaxa( scores, {
removeInactive: true,
numericalCompareProperty
} );
const { updatedObjects: activeTaxonScores, newTaxonIDs } = r;

// if there were taxa added to the counts to replace inactive taxa,
// their ancestries need to be cached for fast common ancestor lookups.
// Lookup only the taxa that haven't aleady been cached
await ComputervisionController.cacheAllTaxonAncestries( );
const allResultTaxonIDs = _.map( activeTaxonScores, "taxon_id" ).concat( newTaxonIDs );
const newIDsToCache = _.compact( _.filter( allResultTaxonIDs,
taxonID => _.isEmpty( TFServingTaxonAncestries[taxonID] ) ) );
await ComputervisionController.cacheTaxonAncestries( newIDsToCache );
return activeTaxonScores;
return json;
}

static async filterScoresByTaxon( req, scores ) {
Expand All @@ -258,6 +248,34 @@ const ComputervisionController = class ComputervisionController {
return scores;
}

static async delegatedScoresProcessing( req, visionApiResponse ) {
const localeOpts = util.localeOpts( req );
const prepareTaxon = t => {
t.prepareForResponse( localeOpts );
};
const taxonOpts = {
modifier: prepareTaxon,
source: { excludes: ["photos", "taxon_photos", "place_ids"] }
};

const response = {
common_ancestor: visionApiResponse.common_ancestor,
results: _.reverse( _.sortBy( visionApiResponse.results, "combined_score" ) )
};

const withTaxa = _.filter(
_.flattenDeep( [response.common_ancestor, response.results] )
);
_.each( withTaxa, s => {
s.taxon_id = s.id;
delete s.id;
s.frequency_score = s.original_geo_score >= s.geo_threshold ? 1 : 0;
} );

await ESModel.fetchBelongsTo( withTaxa, Taxon, taxonOpts );
return response;
}

static async combinedModelScoresProcessing( req, scores ) {
ComputervisionController.normalizeScores( scores, "combined_score" );

Expand Down Expand Up @@ -305,121 +323,43 @@ const ComputervisionController = class ComputervisionController {
static async scoreImageUpload( uploadPath, req ) {
InaturalistAPI.setPerPage( req, { default: 10, max: 100 } );
const imageScores = await ComputervisionController.scoreImagePath( uploadPath, req );
let scores = _.filter( imageScores, s => s.vision_score > 0 );
scores = await ComputervisionController.filterScoresByTaxon( req, scores );
if ( _.isEmpty( scores ) ) {
return InaturalistAPI.basicResponse( req );
if ( req.body.delegate_ca || req.query.delegate_ca ) {
return ComputervisionController.delegatedScoresProcessing( req, imageScores );
}
return ComputervisionController.combinedModelScoresProcessing( req, scores );
}

static async traditionalScoresProcessing( req, scores ) {
scores = _.sortBy( scores, "vision_score" ).reverse( );
ComputervisionController.normalizeScores( scores, "vision_score" );
const commonAncestor = await ComputervisionController.commonAncestor( req, scores );
let frequencyFunction;
// if the redis client is configured and the app is configured to,
// or the request asks for it, query Redis for frequency data
if ( !_.isEmpty( redisClient )
&& ( req.body.redis_frequencies === "true" || config.imageProcesing.frequencyBackend === "redis" ) ) {
frequencyFunction = ComputervisionController.nearbyTaxonFrequenciesRedis;
} else {
// otherwise query Elasticsearch for frequency data
frequencyFunction = ComputervisionController.nearbyTaxonFrequencies;
}
const nearbyTaxa = await frequencyFunction( req, scores, commonAncestor );
return ComputervisionController.scoreImageAfterFrequencies(
req, scores, nearbyTaxa, commonAncestor
);
}
const scores = _.map( imageScores, score => ( {
taxon_id: Number( score.id ),
vision_score: score.vision_score,
combined_score: score.combined_score,
original_geo_score: score.geo_score,
original_combined_score: score.combined_score,
geo_threshold: score.geo_threshold
} ) );
const numericalCompareProperty = "combined_score";

static async scoreImageAfterFrequencies( req, rawVisionScores, nearbyTaxa, commonAncestor ) {
// using _.has just checks to see results is an attribute of nearbyTaxa,
// it would still be true if there were no nearby results, in which case
// there would be no results at all
if ( nearbyTaxa && _.has( nearbyTaxa, "results" ) ) {
const ancestorNearbyTaxaResults = [];
const unrelatedNearbyTaxaResults = [];
const relatedNearbyTaxa = { };
const unrelatedNearbyTaxa = { };
const frequencyScores = { };
const visionScores = { };
const taxonScores = { };
let topScores;
// set frequencyScores and mark if nearby taxon is related to common ancestor
_.each( nearbyTaxa.results, r => {
if ( commonAncestor && r.taxon && r.taxon.ancestor_ids
&& r.taxon.ancestor_ids.includes( commonAncestor.taxon.id ) ) {
r.inCommonAncestor = true;
relatedNearbyTaxa[r.taxon.id] = true;
ancestorNearbyTaxaResults.push( r );
} else if ( r.taxon ) {
unrelatedNearbyTaxa[r.taxon.id] = true;
unrelatedNearbyTaxaResults.push( r );
}
frequencyScores[r.taxon.id] = r.count;
} );
// only boost vision:
// a) given a common ancestor, using nearby taxa in that ancestor
// b) there are no common ancestors, using all nearby taxa
const resultsToUse = commonAncestor ? ancestorNearbyTaxaResults : unrelatedNearbyTaxaResults;
const sumScoresResultsToUse = _.reduce( resultsToUse, ( sum, r ) => ( sum + r.count ), 0 );
_.each( resultsToUse, r => {
const score = ( r.count / sumScoresResultsToUse );
// default score for non-model nearby taxa is the frequency score
taxonScores[r.taxon.id] = score * 0.75;
} );
if ( !_.isEmpty( redisClient )
&& ( req.body.redis_frequencies === "true" || config.imageProcesing.frequencyBackend === "redis" ) ) {
_.each( rawVisionScores, r => {
visionScores[r.taxon_id] = r.vision_score;
if ( taxonScores[r.taxon_id] ) {
// vision results with relevant frequency scores get a boost
taxonScores[r.taxon_id] = r.vision_score
* ( 3 + ( ( taxonScores[r.taxon_id] / 0.75 ) * 6 ) );
} else {
// everything else uses the raw vision score
taxonScores[r.taxon_id] = r.vision_score;
}
} );
topScores = _.map( taxonScores, ( v, k ) => (
{
taxon_id: k,
combined_score: v,
frequency_score: ( frequencyScores[k] || 0 ),
vision_score: ( visionScores[k] || 0 )
}
) );
} else {
_.each( rawVisionScores, r => {
visionScores[r.taxon_id] = r.vision_score;
// the ( ... || 1 ) prevents dividing by 0
taxonScores[r.taxon_id] = taxonScores[r.taxon_id]
? taxonScores[r.taxon_id] * ( r.vision_score / 100 )
: ( r.vision_score / 100 )
* ( 1 / ( ( ancestorNearbyTaxaResults.length || 1 ) * 100 ) );
} );
topScores = _.map( taxonScores, ( v, k ) => {
const baseVisScore = visionScores[k] ? v : v * ( 1 / rawVisionScores.length );
return {
taxon_id: k,
combined_score: baseVisScore,
frequency_score: ( frequencyScores[k] || 0 ) * 100,
vision_score: ( visionScores[k] || 0 )
};
} );
}
topScores = _.sortBy( topScores, s => s.combined_score ).reverse( );
ComputervisionController.normalizeScores( topScores, "combined_score" );
return ComputervisionController.scoreImageResponse(
req, commonAncestor, topScores.slice( 0, req.query.per_page )
);
}
// there are no nearby taxa, so the combined_score is equal to the vision_score
const topScores = _.map( rawVisionScores, s => ( { combined_score: s.vision_score, ...s } ) );
return ComputervisionController.scoreImageResponse(
req, commonAncestor, topScores.slice( 0, req.query.per_page )
// replace inactive taxa with their active counterparts, remove remaining inactive
const r = await TaxaController.replaceInactiveTaxa( scores, {
removeInactive: true,
numericalCompareProperty
} );
const { updatedObjects: activeTaxonScores, newTaxonIDs } = r;

// if there were taxa added to the counts to replace inactive taxa,
// their ancestries need to be cached for fast common ancestor lookups.
// Lookup only the taxa that haven't aleady been cached
await ComputervisionController.cacheAllTaxonAncestries( );
const allResultTaxonIDs = _.map( activeTaxonScores, "taxon_id" ).concat( newTaxonIDs );
const newIDsToCache = _.compact( _.filter( allResultTaxonIDs,
taxonID => _.isEmpty( TFServingTaxonAncestries[taxonID] ) ) );
await ComputervisionController.cacheTaxonAncestries( newIDsToCache );

const filteredScores = await ComputervisionController.filterScoresByTaxon(
req, activeTaxonScores
);
if ( _.isEmpty( filteredScores ) ) {
return InaturalistAPI.basicResponse( req );
}
return ComputervisionController.combinedModelScoresProcessing( req, filteredScores );
}

static async scoreImageResponse( req, commonAncestor, topScores ) {
Expand Down Expand Up @@ -556,61 +496,6 @@ const ComputervisionController = class ComputervisionController {
return commonAncestor;
}

static async nearbyTaxonFrequenciesRedis( req, scores, commonAncestor ) {
if ( !scores || !req.body.lat || !req.body.lng || req.body.skip_frequencies === "true" ) {
return null;
}
const includeTaxonIDs = _.map( scores, "taxon_id" );
const nearbyReq = {
inat: { includeTaxonIDs },
query: {
lat: req.body.lat,
lng: req.body.lng,
observed_on: req.body.observed_on,
taxon_id: req.body.taxon_id
}
};
if ( commonAncestor ) {
nearbyReq.query.taxon_id = commonAncestor.taxon.id;
}
const { results } = await TaxaController.nearby( nearbyReq );
// normalize raw taxon counts into a ratio of all counts
ComputervisionController.normalizeScores( results );
return { results };
}

static async nearbyTaxonFrequencies( req, scores, commonAncestor ) {
if ( !scores || !req.body.lat || !req.body.lng || req.body.skip_frequencies === "true" ) {
return null;
}
let taxonIDs = [];
if ( commonAncestor ) {
taxonIDs.push( commonAncestor.taxon.id );
}
taxonIDs = taxonIDs.concat( _.map( scores, "taxon_id" ) );
const query = {
quality_grade: "research",
taxon_is_active: "true",
taxon_id: taxonIDs,
lat: req.body.lat,
lng: req.body.lng,
radius: req.body.radius || 100 // km
};
if ( req.body.observation_id ) {
query.not_id = req.body.observation_id;
}
if ( req.body.observed_on ) {
const parsedDate = util.parsedDateParam( req.body.observed_on );
if ( parsedDate && parsedDate.isValid( ) ) {
query.observed_after = parsedDate.subtract( req.body.window || 45, "days" )
.format( "YYYY-MM-DDTHH:mm:ss" );
query.observed_before = parsedDate.add( req.body.window || 45, "days" )
.format( "YYYY-MM-DDTHH:mm:ss" );
}
}
return ObservationsController.speciesCounts( { query } );
}

static normalizeScores( scores, key = "count" ) {
const multiplier = 100;
const sumScores = _.sum( _.map( scores, key ) );
Expand Down
3 changes: 2 additions & 1 deletion lib/controllers/v2/computervision_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ const { uuidsToSerialIds } = require( "../../util" );

const scoreObservation = async req => {
await uuidsToSerialIds( req, Observation );
return ctrlv1.scoreObservation( req );
const rsp = await ctrlv1.scoreObservation( req );
return rsp;
};

module.exports = {
Expand Down
1 change: 1 addition & 0 deletions lib/inaturalist_api_v2.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ const InaturalistAPIV2 = class InaturalistAPIV2 {
&& req.operationDoc
&& req.operationDoc.parameters
&& InaturalistAPIV2.isFetchRequest( req )
&& !req.operationDoc["x-allow-empty-results"]
) {
res.status( 404 );
}
Expand Down
Loading
Loading