Skip to content

Commit

Permalink
Merge pull request #434 from inaturalist/cv-delegate-common-ancestor
Browse files Browse the repository at this point in the history
Delegate Common Ancestor
  • Loading branch information
pleary authored Mar 28, 2024
2 parents 03fec11 + 6f23909 commit 9fdfda1
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 206 deletions.
295 changes: 90 additions & 205 deletions lib/controllers/v1/computervision_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ const squel = require( "safe-squel" );
const md5 = require( "md5" );
const PromisePool = require( "es6-promise-pool" );
const pgClient = require( "../../pg_client" );
const redisClient = require( "../../redis_client" );
const ObservationsController = require( "./observations_controller" );
const TaxaController = require( "./taxa_controller" );
const InaturalistAPI = require( "../../inaturalist_api" );
const config = require( "../../../config" );
const util = require( "../../util" );
const ESModel = require( "../../models/es_model" );
const Taxon = require( "../../models/taxon" );
const Observation = require( "../../models/observation" );
const ObservationPreload = require( "../../models/observation_preload" );

// number of image results checked for common ancestor
const DEFAULT_ANCESTOR_WINDOW = 10;
Expand Down Expand Up @@ -80,13 +81,21 @@ const ComputervisionController = class ComputervisionController {
if ( !obsID ) {
throw util.httpError( 422, "Missing observation ID or UUID" );
}
const searchReq = { query: { id: obsID } };
// fetch the obs metadata
const response = await ObservationsController.search( searchReq );
if ( !response || _.isEmpty( response.results ) ) {
const obsObject = [{
observation_id: obsID
}];
const obsOpts = {
source: {
includes: ["id", "taxon", "location"]
}
};
await ESModel.fetchBelongsTo( obsObject, Observation, obsOpts );
const { observation } = obsObject[0];
if ( !observation ) {
throw util.httpError( 422, "Unknown observation" );
}
const observation = response.results[0];

await ObservationPreload.observationPhotos( [observation] );
let photoURL;
_.each( observation.photos, p => {
if ( photoURL ) { return; }
Expand All @@ -107,7 +116,7 @@ const ComputervisionController = class ComputervisionController {
if ( !req.userSession && !req.applicationSession ) {
throw util.httpError( 401, "Unauthorized" );
}
const photoURL = req.query.image_url;
const photoURL = req.query.image_url || req.body.image_url;
if ( !photoURL ) {
throw util.httpError( 422, "No scorable photo" );
}
Expand Down Expand Up @@ -142,6 +151,7 @@ const ComputervisionController = class ComputervisionController {
}
} );
if ( !scoreImageReq.body ) { scoreImageReq.body = { }; }
delete scoreImageReq.body.image_url;
scoreImageReq.body.lat = scoreImageReq.body.lat || req.query.lat;
scoreImageReq.body.lng = scoreImageReq.body.lng || req.query.lng;
scoreImageReq.body.radius = scoreImageReq.body.radius || req.query.radius;
Expand Down Expand Up @@ -170,6 +180,9 @@ const ComputervisionController = class ComputervisionController {
if ( !req.userSession && !req.applicationSession ) {
throw util.httpError( 401, "Unauthorized" );
}
if ( req.userSession?.isAdmin && req.body.image_url ) {
return ComputervisionController.scoreImageURL( req );
}
req.file = req.file || (
req.files
&& req.files.image
Expand All @@ -189,6 +202,13 @@ const ComputervisionController = class ComputervisionController {
knownLength: fs.statSync( uploadPath ).size
} );
formData.append( "geomodel", "true" );
if ( req.body.delegate_ca || req.query.delegate_ca ) {
formData.append( "aggregated", "true" );
formData.append( "format", "object" );
if ( req.body.taxon_id ) {
formData.append( "taxon_id", req.body.taxon_id );
}
}
if ( !req.body.skip_frequencies && req.body.lat && req.body.lng ) {
formData.append( "lat", req.body.lat );
formData.append( "lng", req.body.lng );
Expand All @@ -214,37 +234,7 @@ const ComputervisionController = class ComputervisionController {
} finally {
clearTimeout( requestTimeout );
}

const scores = _.map( json, score => ( {
taxon_id: Number( score.id ),
vision_score: score.vision_score,
combined_score: score.combined_score,
original_geo_score: score.geo_score,
original_combined_score: score.combined_score,
geo_threshold: score.geo_threshold
} ) );
const numericalCompareProperty = "combined_score";
const sumOfCombinedScores = _.sum( _.map( scores, "combined_score" ) );
if ( sumOfCombinedScores === 0 ) {
_.each( scores, s => { s.combined_score = s.vision_score; } );
}

// replace inactive taxa with their active counterparts, remove remaining inactive
const r = await TaxaController.replaceInactiveTaxa( scores, {
removeInactive: true,
numericalCompareProperty
} );
const { updatedObjects: activeTaxonScores, newTaxonIDs } = r;

// if there were taxa added to the counts to replace inactive taxa,
// their ancestries need to be cached for fast common ancestor lookups.
// Lookup only the taxa that haven't aleady been cached
await ComputervisionController.cacheAllTaxonAncestries( );
const allResultTaxonIDs = _.map( activeTaxonScores, "taxon_id" ).concat( newTaxonIDs );
const newIDsToCache = _.compact( _.filter( allResultTaxonIDs,
taxonID => _.isEmpty( TFServingTaxonAncestries[taxonID] ) ) );
await ComputervisionController.cacheTaxonAncestries( newIDsToCache );
return activeTaxonScores;
return json;
}

static async filterScoresByTaxon( req, scores ) {
Expand All @@ -258,6 +248,34 @@ const ComputervisionController = class ComputervisionController {
return scores;
}

static async delegatedScoresProcessing( req, visionApiResponse ) {
const localeOpts = util.localeOpts( req );
const prepareTaxon = t => {
t.prepareForResponse( localeOpts );
};
const taxonOpts = {
modifier: prepareTaxon,
source: { excludes: ["photos", "taxon_photos", "place_ids"] }
};

const response = {
common_ancestor: visionApiResponse.common_ancestor,
results: _.reverse( _.sortBy( visionApiResponse.results, "combined_score" ) )
};

const withTaxa = _.filter(
_.flattenDeep( [response.common_ancestor, response.results] )
);
_.each( withTaxa, s => {
s.taxon_id = s.id;
delete s.id;
s.frequency_score = s.original_geo_score >= s.geo_threshold ? 1 : 0;
} );

await ESModel.fetchBelongsTo( withTaxa, Taxon, taxonOpts );
return response;
}

static async combinedModelScoresProcessing( req, scores ) {
ComputervisionController.normalizeScores( scores, "combined_score" );

Expand Down Expand Up @@ -305,121 +323,43 @@ const ComputervisionController = class ComputervisionController {
static async scoreImageUpload( uploadPath, req ) {
InaturalistAPI.setPerPage( req, { default: 10, max: 100 } );
const imageScores = await ComputervisionController.scoreImagePath( uploadPath, req );
let scores = _.filter( imageScores, s => s.vision_score > 0 );
scores = await ComputervisionController.filterScoresByTaxon( req, scores );
if ( _.isEmpty( scores ) ) {
return InaturalistAPI.basicResponse( req );
if ( req.body.delegate_ca || req.query.delegate_ca ) {
return ComputervisionController.delegatedScoresProcessing( req, imageScores );
}
return ComputervisionController.combinedModelScoresProcessing( req, scores );
}

static async traditionalScoresProcessing( req, scores ) {
scores = _.sortBy( scores, "vision_score" ).reverse( );
ComputervisionController.normalizeScores( scores, "vision_score" );
const commonAncestor = await ComputervisionController.commonAncestor( req, scores );
let frequencyFunction;
// if the redis client is configured and the app is configured to,
// or the request asks for it, query Redis for frequency data
if ( !_.isEmpty( redisClient )
&& ( req.body.redis_frequencies === "true" || config.imageProcesing.frequencyBackend === "redis" ) ) {
frequencyFunction = ComputervisionController.nearbyTaxonFrequenciesRedis;
} else {
// otherwise query Elasticsearch for frequency data
frequencyFunction = ComputervisionController.nearbyTaxonFrequencies;
}
const nearbyTaxa = await frequencyFunction( req, scores, commonAncestor );
return ComputervisionController.scoreImageAfterFrequencies(
req, scores, nearbyTaxa, commonAncestor
);
}
const scores = _.map( imageScores, score => ( {
taxon_id: Number( score.id ),
vision_score: score.vision_score,
combined_score: score.combined_score,
original_geo_score: score.geo_score,
original_combined_score: score.combined_score,
geo_threshold: score.geo_threshold
} ) );
const numericalCompareProperty = "combined_score";

static async scoreImageAfterFrequencies( req, rawVisionScores, nearbyTaxa, commonAncestor ) {
// using _.has just checks to see results is an attribute of nearbyTaxa,
// it would still be true if there were no nearby results, in which case
// there would be no results at all
if ( nearbyTaxa && _.has( nearbyTaxa, "results" ) ) {
const ancestorNearbyTaxaResults = [];
const unrelatedNearbyTaxaResults = [];
const relatedNearbyTaxa = { };
const unrelatedNearbyTaxa = { };
const frequencyScores = { };
const visionScores = { };
const taxonScores = { };
let topScores;
// set frequencyScores and mark if nearby taxon is related to common ancestor
_.each( nearbyTaxa.results, r => {
if ( commonAncestor && r.taxon && r.taxon.ancestor_ids
&& r.taxon.ancestor_ids.includes( commonAncestor.taxon.id ) ) {
r.inCommonAncestor = true;
relatedNearbyTaxa[r.taxon.id] = true;
ancestorNearbyTaxaResults.push( r );
} else if ( r.taxon ) {
unrelatedNearbyTaxa[r.taxon.id] = true;
unrelatedNearbyTaxaResults.push( r );
}
frequencyScores[r.taxon.id] = r.count;
} );
// only boost vision:
// a) given a common ancestor, using nearby taxa in that ancestor
// b) there are no common ancestors, using all nearby taxa
const resultsToUse = commonAncestor ? ancestorNearbyTaxaResults : unrelatedNearbyTaxaResults;
const sumScoresResultsToUse = _.reduce( resultsToUse, ( sum, r ) => ( sum + r.count ), 0 );
_.each( resultsToUse, r => {
const score = ( r.count / sumScoresResultsToUse );
// default score for non-model nearby taxa is the frequency score
taxonScores[r.taxon.id] = score * 0.75;
} );
if ( !_.isEmpty( redisClient )
&& ( req.body.redis_frequencies === "true" || config.imageProcesing.frequencyBackend === "redis" ) ) {
_.each( rawVisionScores, r => {
visionScores[r.taxon_id] = r.vision_score;
if ( taxonScores[r.taxon_id] ) {
// vision results with relevant frequency scores get a boost
taxonScores[r.taxon_id] = r.vision_score
* ( 3 + ( ( taxonScores[r.taxon_id] / 0.75 ) * 6 ) );
} else {
// everything else uses the raw vision score
taxonScores[r.taxon_id] = r.vision_score;
}
} );
topScores = _.map( taxonScores, ( v, k ) => (
{
taxon_id: k,
combined_score: v,
frequency_score: ( frequencyScores[k] || 0 ),
vision_score: ( visionScores[k] || 0 )
}
) );
} else {
_.each( rawVisionScores, r => {
visionScores[r.taxon_id] = r.vision_score;
// the ( ... || 1 ) prevents dividing by 0
taxonScores[r.taxon_id] = taxonScores[r.taxon_id]
? taxonScores[r.taxon_id] * ( r.vision_score / 100 )
: ( r.vision_score / 100 )
* ( 1 / ( ( ancestorNearbyTaxaResults.length || 1 ) * 100 ) );
} );
topScores = _.map( taxonScores, ( v, k ) => {
const baseVisScore = visionScores[k] ? v : v * ( 1 / rawVisionScores.length );
return {
taxon_id: k,
combined_score: baseVisScore,
frequency_score: ( frequencyScores[k] || 0 ) * 100,
vision_score: ( visionScores[k] || 0 )
};
} );
}
topScores = _.sortBy( topScores, s => s.combined_score ).reverse( );
ComputervisionController.normalizeScores( topScores, "combined_score" );
return ComputervisionController.scoreImageResponse(
req, commonAncestor, topScores.slice( 0, req.query.per_page )
);
}
// there are no nearby taxa, so the combined_score is equal to the vision_score
const topScores = _.map( rawVisionScores, s => ( { combined_score: s.vision_score, ...s } ) );
return ComputervisionController.scoreImageResponse(
req, commonAncestor, topScores.slice( 0, req.query.per_page )
// replace inactive taxa with their active counterparts, remove remaining inactive
const r = await TaxaController.replaceInactiveTaxa( scores, {
removeInactive: true,
numericalCompareProperty
} );
const { updatedObjects: activeTaxonScores, newTaxonIDs } = r;

// if there were taxa added to the counts to replace inactive taxa,
// their ancestries need to be cached for fast common ancestor lookups.
// Lookup only the taxa that haven't aleady been cached
await ComputervisionController.cacheAllTaxonAncestries( );
const allResultTaxonIDs = _.map( activeTaxonScores, "taxon_id" ).concat( newTaxonIDs );
const newIDsToCache = _.compact( _.filter( allResultTaxonIDs,
taxonID => _.isEmpty( TFServingTaxonAncestries[taxonID] ) ) );
await ComputervisionController.cacheTaxonAncestries( newIDsToCache );

const filteredScores = await ComputervisionController.filterScoresByTaxon(
req, activeTaxonScores
);
if ( _.isEmpty( filteredScores ) ) {
return InaturalistAPI.basicResponse( req );
}
return ComputervisionController.combinedModelScoresProcessing( req, filteredScores );
}

static async scoreImageResponse( req, commonAncestor, topScores ) {
Expand Down Expand Up @@ -556,61 +496,6 @@ const ComputervisionController = class ComputervisionController {
return commonAncestor;
}

static async nearbyTaxonFrequenciesRedis( req, scores, commonAncestor ) {
if ( !scores || !req.body.lat || !req.body.lng || req.body.skip_frequencies === "true" ) {
return null;
}
const includeTaxonIDs = _.map( scores, "taxon_id" );
const nearbyReq = {
inat: { includeTaxonIDs },
query: {
lat: req.body.lat,
lng: req.body.lng,
observed_on: req.body.observed_on,
taxon_id: req.body.taxon_id
}
};
if ( commonAncestor ) {
nearbyReq.query.taxon_id = commonAncestor.taxon.id;
}
const { results } = await TaxaController.nearby( nearbyReq );
// normalize raw taxon counts into a ratio of all counts
ComputervisionController.normalizeScores( results );
return { results };
}

static async nearbyTaxonFrequencies( req, scores, commonAncestor ) {
if ( !scores || !req.body.lat || !req.body.lng || req.body.skip_frequencies === "true" ) {
return null;
}
let taxonIDs = [];
if ( commonAncestor ) {
taxonIDs.push( commonAncestor.taxon.id );
}
taxonIDs = taxonIDs.concat( _.map( scores, "taxon_id" ) );
const query = {
quality_grade: "research",
taxon_is_active: "true",
taxon_id: taxonIDs,
lat: req.body.lat,
lng: req.body.lng,
radius: req.body.radius || 100 // km
};
if ( req.body.observation_id ) {
query.not_id = req.body.observation_id;
}
if ( req.body.observed_on ) {
const parsedDate = util.parsedDateParam( req.body.observed_on );
if ( parsedDate && parsedDate.isValid( ) ) {
query.observed_after = parsedDate.subtract( req.body.window || 45, "days" )
.format( "YYYY-MM-DDTHH:mm:ss" );
query.observed_before = parsedDate.add( req.body.window || 45, "days" )
.format( "YYYY-MM-DDTHH:mm:ss" );
}
}
return ObservationsController.speciesCounts( { query } );
}

static normalizeScores( scores, key = "count" ) {
const multiplier = 100;
const sumScores = _.sum( _.map( scores, key ) );
Expand Down
3 changes: 2 additions & 1 deletion lib/controllers/v2/computervision_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ const { uuidsToSerialIds } = require( "../../util" );

const scoreObservation = async req => {
await uuidsToSerialIds( req, Observation );
return ctrlv1.scoreObservation( req );
const rsp = await ctrlv1.scoreObservation( req );
return rsp;
};

module.exports = {
Expand Down
1 change: 1 addition & 0 deletions lib/inaturalist_api_v2.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ const InaturalistAPIV2 = class InaturalistAPIV2 {
&& req.operationDoc
&& req.operationDoc.parameters
&& InaturalistAPIV2.isFetchRequest( req )
&& !req.operationDoc["x-allow-empty-results"]
) {
res.status( 404 );
}
Expand Down
Loading

0 comments on commit 9fdfda1

Please sign in to comment.