Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more validation to Rite Aid API #547

Merged
28 changes: 28 additions & 0 deletions loader/src/__mocks__/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"use strict";
const { jest } = require("@jest/globals");
const originalModule = jest.requireActual("../utils");

const warningLoggers = {};
const warnings = [];

const mock = {
...originalModule,

createWarningLogger(prefix) {
warningLoggers[prefix] = jest.fn((message) => warnings.push(message));
return warningLoggers[prefix];
},

// Only for use in tests: get mock function for a logger.
__getWarningLogger(prefix) {
prefix = prefix || Object.keys(warningLoggers).pop();
return warningLoggers[prefix];
},

// Only for use in tests: get a list of all warnings that were logged.
__getWarnings() {
return warnings;
},
};

module.exports = mock;
26 changes: 25 additions & 1 deletion loader/src/schema-validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,32 @@ function assertSchema(schema, data, message) {
}
}

/**
* Update an "object" schema to require every property listed in the
* `properties` object. Returns the schema so you can just wrap a schema
* definition with it.
* @param {any} schema The schema to require all properties on.
* @returns {any}
*
* @example
* var mySchema = requireAllProperties({
* type: "object",
* properties: {
* a: { type: "number" },
* b: { type: "string" }
* }
* });
* // Throws an error:
* assertSchema(mySchema, { a: 5 });
*/
function requireAllProperties(schema) {
schema.required = Object.keys(schema.properties);
return schema;
}

module.exports = {
SchemaError,
getValidator,
assertSchema,
getValidator,
requireAllProperties,
};
177 changes: 154 additions & 23 deletions loader/src/sources/riteaid/api.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
const { DateTime } = require("luxon");
const Sentry = require("@sentry/node");
const geocoding = require("../../geocoding");
const { ParseError } = require("../../exceptions");
const { Available, LocationType } = require("../../model");
const { createWarningLogger, httpClient, RateLimit } = require("../../utils");
const {
createWarningLogger,
httpClient,
parseUsPhoneNumber,
RateLimit,
} = require("../../utils");
const {
assertSchema,
requireAllProperties,
} = require("../../schema-validation");
const { RiteAidApiError } = require("./common");

const warn = createWarningLogger("Rite Aid API");

// Log a warning if a location has more than this many slots in a given day.
const MAXIMUM_SLOT_COUNT = 500;

// States in which Rite Aid has stores.
const riteAidStates = new Set([
"CA",
Expand All @@ -28,6 +42,78 @@ const riteAidStates = new Set([
"WA",
]);

const riteAidWrapperSchema = requireAllProperties({
type: "object",
properties: {
Status: { type: "string" },
ErrCde: {},
ErrMsg: {},
ErrMsgDtl: {},
Data: requireAllProperties({
type: "object",
properties: {
providerDetails: { type: "array" },
},
additionalProperties: false,
}),
},
});

const riteAidLocationSchema = requireAllProperties({
type: "object",
properties: {
id: { type: "integer", minimum: 1 },
// parseUpdateTime() does fancy checking, so no need to check format here.
last_updated: { type: "string" },
name: { type: "string", pattern: "Rite Aid" },
location: requireAllProperties({
type: "object",
properties: {
resourceType: { type: "null" },
id: { type: "null" },
identifier: { type: "null" },
name: { type: "null" },
telecom: { type: "null" },
address: { type: "null" },
position: { type: "null" },
meta: { type: "null" },
description: { type: "null" },
street: { type: "string" },
street_line_2: { type: "string", nullable: true },
city: { type: "string" },
state: { type: "string", pattern: "[A-Z]{2}" },
zipcode: { type: "string", pattern: "\\d{1,5}(-\\d{4})?" },
county: { type: "null" },
identifiers: { type: "null" },
},
additionalProperties: false,
}),
contact: requireAllProperties({
type: "object",
properties: {
booking_phone: { type: "string", nullable: true },
booking_url: { type: "string", format: "uri" },
info_phone: { type: "string", nullable: true },
info_url: { type: "string", format: "uri" },
},
additionalProperties: false,
}),
availability: {
type: "array",
items: requireAllProperties({
type: "object",
properties: {
date: { type: "string", format: "date" },
total_slots: { type: "integer", minimum: 0 },
available_slots: { type: "integer", minimum: 0 },
},
additionalProperties: false,
}),
},
},
additionalProperties: false,
});

async function queryState(state, rateLimit = null) {
const RITE_AID_URL = process.env["RITE_AID_URL"];
const RITE_AID_KEY = process.env["RITE_AID_KEY"];
Expand All @@ -40,22 +126,24 @@ async function queryState(state, rateLimit = null) {

if (rateLimit) await rateLimit.ready();

const body = await httpClient({
const response = await httpClient({
url: RITE_AID_URL,
headers: { "Proxy-Authorization": "ldap " + RITE_AID_KEY },
searchParams: { stateCode: state },
}).json();

if (body.Status !== "SUCCESS") {
console.error(body.Status);
console.error(body.ErrCde);
console.error(body.ErrMsg);
console.error(body.ErrMsgDtl);
responseType: "json",
});

throw new Error("RiteAid API request failed");
if (response.body.Status !== "SUCCESS") {
throw new RiteAidApiError(response);
}

return body.Data.providerDetails.map(formatStore);
assertSchema(
riteAidWrapperSchema,
response.body,
"Response did not match schema"
);
Comment on lines +140 to +144
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made separate schemas for the API response wrapper and the actual objects because:

  1. It’s possible only some locations could get out of spec, so this lets us avoid breaking everything in that case. (OTOH, maybe we should break everything in that case? Also, not sure this scenario is at all likely to occur.)

  2. From a practical standpoint, the error message lists every error, and if there is a systemic change, the error message would be incredibly long because it repeats the same things hundreds or thousands of times (one for every location object) if there was just one schema instead of a separate one for the locations that we check when formatting on each location.

    Of course, that could be mitigated by limiting how many errors we include in the actual error message here:

    constructor(errors, data, message = null) {
    message = message || "Data did not match schema";
    const details = errors.map(SchemaError.formatAjvError);
    const detailMessage = details.map((detail) => ` ${detail}`).join("\n");
    super(`${message} - ${errors.length} errors:\n${detailMessage}`);


return response.body.Data.providerDetails;
}

/**
Expand Down Expand Up @@ -90,6 +178,12 @@ function parseUpdateTime(text) {
}

function formatStore(provider) {
assertSchema(
riteAidLocationSchema,
provider,
"API location did not match schema"
);

const address = formatAddress(provider.location);

let county = provider.location.county;
Expand Down Expand Up @@ -121,9 +215,13 @@ function formatStore(provider) {
state: provider.location.state,
postal_code: provider.location.zipcode,
county,
booking_phone: provider.contact.booking_phone,
booking_phone:
provider.contact.booking_phone &&
parseUsPhoneNumber(provider.contact.booking_phone),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Any reason not to have parseUsPhoneNumber handle the falsy check as well?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I was trying to match the behavior of parseUsAddress, which also does not support non-string input. It also seems a little easier to reason about if you can guarantee the return type will be a string.

But for this use case, it would definitely be simpler. Think I should update parseUsAddress, too? (I think the non-support for null/undefined in parseUsAddress is currently supporting some edge cases in NJVSS and Albertsons, so it may require some more changes.)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One approach that can solve for that is to allow the caller to provide a default fallback value, or else the function will throw an error given bad input (including wrong data type).

That said, parseUsAddress is complicated enough and used elsewhere enough that I don't know if it's worth messing with. I do appreciate that the two parseUsXYZ functions are consistent in whether or not you need to look before you leap.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, I think I'll keep it as-is for now, and maybe find ways to handle null for both of them later.

booking_url: provider.contact.booking_url,
info_phone: provider.contact.info_phone,
info_phone:
provider.contact.info_phone &&
parseUsPhoneNumber(provider.contact.info_phone),
info_url: provider.contact.info_url,

availability: {
Expand All @@ -144,12 +242,30 @@ function formatAvailable(provider) {
}

function formatCapacity(provider) {
return provider.availability.map((apiData) => ({
date: apiData.date,
available: apiData.available_slots > 0 ? Available.yes : Available.no,
available_count: apiData.available_slots,
unavailable_count: apiData.total_slots - apiData.available_slots,
}));
let maxDailySlots = 0;
const result = provider.availability.map((apiData) => {
maxDailySlots = Math.max(maxDailySlots, apiData.total_slots);
if (apiData.available_slots > apiData.total_slots) {
throw new Error("More available slots than total slots at a Rite Aid");
}

return {
date: apiData.date,
available: apiData.available_slots > 0 ? Available.yes : Available.no,
available_count: apiData.available_slots,
unavailable_count: apiData.total_slots - apiData.available_slots,
};
});

if (maxDailySlots > MAXIMUM_SLOT_COUNT) {
warn(
"Unrealistic slot count at a Rite Aid",
{ slots: maxDailySlots },
true
);
}

return result;
}

function formatAddress(location) {
Expand All @@ -174,7 +290,7 @@ async function checkAvailability(handler, options) {

if (!states.length) {
const statesText = Array.from(riteAidStates).join(", ");
console.warn(`No states set for riteAidApi (supported: ${statesText})`);
warn(`No states set for riteAidApi (supported: ${statesText})`);
}

if (options.rateLimit != null && isNaN(options.rateLimit)) {
Expand All @@ -185,11 +301,26 @@ async function checkAvailability(handler, options) {

let results = [];
for (const state of states) {
let stores;
// Sentry's withScope() doesn't work for async code, so we have to manually
// track the context data we want to add. :(
const errorContext = { state, source: "Rite Aid API" };

const stores = [];
try {
stores = await queryState(state, rateLimit);
const rawData = await queryState(state, rateLimit);
for (const rawLocation of rawData) {
Sentry.withScope((scope) => {
scope.setContext("context", errorContext);
scope.setContext("location", { id: rawLocation.id });
try {
stores.push(formatStore(rawLocation));
} catch (error) {
warn(error);
}
});
}
} catch (error) {
warn(error, { state, source: "Rite Aid API" }, true);
warn(error, errorContext, true);
Comment on lines +304 to +323
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kind of ugly, but withScope() can’t handle async code. Definitely open to changes here if you have good thoughts on better patterns, or nice sugar.

Sentry has built-in support for managing async stuff with domains (for example, the express plugin uses them), but domains are a deprecated feature that are going away at some point in Node.js (I think in favor of Async Hooks). So I’m hesitant to build a fancy abstraction on them.

There are also some Sentry issues tracking better async support:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm not sure it's worthwhile trying to build a generalized solve here, or at least not yet. The code as written reads fine to me and doesn't seem overly messy.

continue;
}

Expand Down
12 changes: 12 additions & 0 deletions loader/src/sources/riteaid/common.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const assert = require("assert/strict");
const { HttpApiError } = require("../../exceptions");

class RiteAidApiError extends HttpApiError {
parse(response) {
assert.equal(typeof response.body, "object");
this.details = response.body;
this.message = `${this.details.Status} ${this.details.ErrCde}: ${this.details.ErrMsg}`;
}
}

module.exports = { RiteAidApiError };
14 changes: 3 additions & 11 deletions loader/src/sources/riteaid/scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
const { mapKeys } = require("lodash");
const { DateTime } = require("luxon");
const Sentry = require("@sentry/node");
const { HttpApiError } = require("../../exceptions");
const { Available, LocationType, VaccineProduct } = require("../../model");
const { assertSchema } = require("../../schema-validation");
const {
Expand All @@ -20,6 +19,7 @@ const {
TIME_ZONE_OFFSET_STRINGS,
createWarningLogger,
} = require("../../utils");
const { RiteAidApiError } = require("./common");
const { zipCodesCovering100Miles } = require("./zip-codes");

// Load slot-level data in chunks of this many stores at a time.
Expand Down Expand Up @@ -50,13 +50,6 @@ const VACCINE_IDS_SHORT = mapKeys(VACCINE_IDS, (_, key) => key.split("-")[0]);

const warn = createWarningLogger("Rite Aid Scraper");

class RiteAidXhrError extends HttpApiError {
parse(response) {
super.parse(response);
this.message = `${this.details.Status} ${this.details.ErrCde}: ${this.details.ErrMsg}`;
}
}

async function queryZipCode(zip, radius = 100, stores = null) {
const response = await httpClient({
url: API_URL,
Expand All @@ -82,7 +75,7 @@ async function queryZipCode(zip, radius = 100, stores = null) {
});

if (response.body.Status !== "SUCCESS") {
throw new RiteAidXhrError(response);
throw new RiteAidApiError(response);
}

return response.body;
Expand Down Expand Up @@ -334,7 +327,7 @@ async function checkAvailability(handler, options) {
}

if (!states.length) {
console.warn(`No states set for riteAidApi`);
warn(`No states set for riteAidApi`);
}

if (options.rateLimit != null && isNaN(options.rateLimit)) {
Expand Down Expand Up @@ -366,7 +359,6 @@ module.exports = {
API_URL,
VACCINE_IDS,
VACCINE_IDS_SHORT,
RiteAidXhrError,
checkAvailability,
queryState,
queryZipCode,
Expand Down
Loading