Skip to content

Commit

Permalink
refactor(directory): move preprocess to directory service
Browse files Browse the repository at this point in the history
- remove rawdirectory search from urlrepository
- shift preprocess task to directorysearchservice
  • Loading branch information
Oxiang committed Dec 10, 2020
1 parent d247039 commit ff8be11
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 143 deletions.
142 changes: 8 additions & 134 deletions src/server/repositories/UrlRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@ import { QueryTypes } from 'sequelize'
import { Url, UrlType } from '../models/url'
import { NotFoundError } from '../util/error'
import { redirectClient } from '../redis'
import {
logger,
redirectExpiry,
searchDescriptionWeight,
searchShortUrlWeight,
} from '../config'
import { logger, redirectExpiry } from '../config'
import { sequelize } from '../util/sequelize'
import { DependencyIds } from '../constants'
import { FileVisibility, S3Interface } from '../services/aws'
Expand All @@ -23,10 +18,6 @@ import {
} from './types'
import { StorableUrlState } from './enums'
import { Mapper } from '../mappers/Mapper'
import { SearchResultsSortOrder } from '../../shared/search'
import { urlSearchVector } from '../models/search'
import { DirectoryQueryConditions } from '../services/interfaces/DirectorySearchServiceInterface'
import { sanitiseQuery } from '../util/sanitise'

const { Public, Private } = FileVisibility

Expand Down Expand Up @@ -141,54 +132,14 @@ export class UrlRepository implements UrlRepositoryInterface {
}
}

public rawDirectorySearch: (
conditions: DirectoryQueryConditions,
) => Promise<UrlDirectoryPaginated> = async (conditions) => {
const { query, order, limit, offset, state, isFile, isEmail } = conditions

const { tableName } = Url

const urlVector = urlSearchVector

const rankingAlgorithm = this.getRankingAlgorithm(order, tableName)

const urlsModel = await (isEmail
? this.getRelevantUrlsFromEmail(
query,
rankingAlgorithm,
limit,
offset,
state,
isFile,
)
: this.getRelevantUrlsFromText(
urlVector,
rankingAlgorithm,
limit,
offset,
query,
state,
isFile,
))

return urlsModel
}

private async getRelevantUrlsFromEmail(
query: string,
public async getRelevantUrlsFromEmail(
likeQuery: string[],
rankingAlgorithm: string,
limit: number,
offset: number,
state: string | undefined,
isFile: boolean | undefined,
queryState: string[],
queryFile: boolean[],
): Promise<UrlDirectoryPaginated> {
const emails = query.toString().split(' ')
// split email/domains by space into tokens, also reduces injections
const likeQuery = emails.map(sanitiseQuery)

const queryFile = this.getQueryFileEmail(isFile)
const queryState = this.getQueryStateEmail(state)

// TODO: optimize the search query, possibly with reverse-email search
const rawQuery = `
SELECT "users"."email", "urls"."shortUrl", "urls"."state", "urls"."isFile"
Expand Down Expand Up @@ -220,17 +171,15 @@ export class UrlRepository implements UrlRepositoryInterface {
return { count, urls: slicedUrlsModel }
}

private async getRelevantUrlsFromText(
public async getRelevantUrlsFromText(
urlVector: string,
rankingAlgorithm: string,
limit: number,
offset: number,
query: string,
state: string | undefined,
isFile: boolean | undefined,
queryState: string,
queryFile: string,
): Promise<UrlDirectoryPaginated> {
const queryFile = this.getQueryFileText(isFile)
const queryState = this.getQueryStateText(state)
const rawQuery = `
SELECT "urls"."shortUrl", "users"."email", "urls"."state", "urls"."isFile"
FROM urls AS "urls"
Expand Down Expand Up @@ -260,46 +209,6 @@ export class UrlRepository implements UrlRepositoryInterface {
return { count, urls: slicedUrlsModel }
}

private getQueryFileEmail: (isFile: boolean | undefined) => Array<boolean> = (
isFile,
) => {
let queryFile = [true, false]
if (isFile === true) queryFile = [true]
else if (isFile === false) queryFile = [false]

return queryFile
}

private getQueryStateEmail: (state: string | undefined) => Array<string> = (
state,
) => {
let queryState = ['ACTIVE', 'INACTIVE']
if (state === 'ACTIVE') queryState = ['ACTIVE']
else if (state === 'INACTIVE') queryState = ['INACTIVE']

return queryState
}

private getQueryFileText: (isFile: boolean | undefined) => string = (
isFile,
) => {
let queryFile = ''
if (isFile === true) queryFile = `AND urls."isFile"=true`
else if (isFile === false) queryFile = `AND urls."isFile"=false`

return queryFile
}

private getQueryStateText: (state: string | undefined) => string = (
state,
) => {
let queryState = ''
if (state === 'ACTIVE') queryState = `AND urls.state = 'ACTIVE'`
else if (state === 'INACTIVE') queryState = `AND urls.state = 'INACTIVE'`

return queryState
}

/**
* Invalidates the redirect entry on the cache for the input
* short url.
Expand Down Expand Up @@ -379,41 +288,6 @@ export class UrlRepository implements UrlRepositoryInterface {
})
})
}

/**
* Generates the ranking algorithm to be used in the ORDER BY clause in the
* SQL statement based on the input sort order.
* @param {SearchResultsSortOrder} order
* @param {string} tableName
* @returns The clause as a string.
*/
private getRankingAlgorithm(
order: SearchResultsSortOrder,
tableName: string,
): string {
let rankingAlgorithm
switch (order) {
case SearchResultsSortOrder.Relevance:
{
// The 3rd argument passed into ts_rank_cd represents
// the normalization option that specifies whether and how
// a document's length should impact its rank. It works as a bit mask.
// 1 divides the rank by 1 + the logarithm of the document length
const textRanking = `ts_rank_cd('{0, 0, ${searchDescriptionWeight}, ${searchShortUrlWeight}}',${urlSearchVector}, query, 1)`
rankingAlgorithm = `${textRanking} * log(${tableName}.clicks + 1)`
}
break
case SearchResultsSortOrder.Recency:
rankingAlgorithm = `${tableName}."createdAt"`
break
case SearchResultsSortOrder.Popularity:
rankingAlgorithm = `${tableName}.clicks`
break
default:
throw new Error(`Unsupported SearchResultsSortOrder: ${order}`)
}
return rankingAlgorithm
}
}

export default UrlRepository
42 changes: 36 additions & 6 deletions src/server/repositories/interfaces/UrlRepositoryInterface.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { StorableFile, StorableUrl, UrlDirectoryPaginated } from '../types'
import { DirectoryQueryConditions } from '../../services/interfaces/DirectorySearchServiceInterface'

/**
* A url repository that handles access to the data store of Urls.
Expand Down Expand Up @@ -42,11 +41,42 @@ export interface UrlRepositoryInterface {
getLongUrl: (shortUrl: string) => Promise<string>

/**
* Performs search for email and plain text search.
* @param {DirectoryQueryConditions} conditions The search query conditions.
* @returns Promise of total no. Of search results and the results on the current page.
* Search results base on email domains.
* @param {string[]} likeQuery List of valid email and domains for SQL query.
* @param {string} rankingAlgorithm Sort order.
* @param {number} limit Number of results returned.
* @param {number} offset Number of results skipped.
* @param {string[]} queryState List of states to retrieve for SQL query.
* @param {boolean[]} queryFile List of url types to retrieve for SQL query.
* @returns Promise that returns list of longUrl and count.
*/
rawDirectorySearch: (
conditions: DirectoryQueryConditions,
getRelevantUrlsFromEmail: (
likeQuery: string[],
rankingAlgorithm: string,
limit: number,
offset: number,
queryState: string[],
queryFile: boolean[],
) => Promise<UrlDirectoryPaginated>

/**
* Search results base on email domains.
* @param {string} urlVector Vectorised search expression.
* @param {string} rankingAlgorithm Sort order.
* @param {number} limit Number of results returned.
* @param {number} offset Number of results skipped.
* @param {string} query Search query to be vectorised.
* @param {string} queryState List of states to retrieve for SQL query.
* @param {string} queryFile List of url types to retrieve for SQL query.
* @returns Promise that returns list of longUrl and count.
*/
getRelevantUrlsFromText: (
urlVector: string,
rankingAlgorithm: string,
limit: number,
offset: number,
query: string,
queryState: string,
queryFile: string,
) => Promise<UrlDirectoryPaginated>
}
113 changes: 110 additions & 3 deletions src/server/services/DirectorySearchService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ import { UrlRepositoryInterface } from '../repositories/interfaces/UrlRepository
import { DependencyIds } from '../constants'
import { UrlDirectoryPaginated } from '../repositories/types'
import { DirectoryQueryConditions } from './interfaces/DirectorySearchServiceInterface'
import { sanitiseQuery } from '../util/sanitise'
import { Url } from '../models/url'
import { SearchResultsSortOrder } from '../../shared/search'
import { searchDescriptionWeight, searchShortUrlWeight } from '../config'
import { urlSearchVector } from '../models/search'

@injectable()
export class DirectorySearchService {
Expand All @@ -14,12 +19,114 @@ export class DirectorySearchService {
this.urlRepository = urlRepository
}

private getQueryFileEmail: (isFile: boolean | undefined) => Array<boolean> = (
isFile,
) => {
let queryFile = [true, false]
if (isFile === true) queryFile = [true]
else if (isFile === false) queryFile = [false]

return queryFile
}

private getQueryStateEmail: (state: string | undefined) => Array<string> = (
state,
) => {
let queryState = ['ACTIVE', 'INACTIVE']
if (state === 'ACTIVE') queryState = ['ACTIVE']
else if (state === 'INACTIVE') queryState = ['INACTIVE']

return queryState
}

private getQueryFileText: (isFile: boolean | undefined) => string = (
isFile,
) => {
let queryFile = ''
if (isFile === true) queryFile = `AND urls."isFile"=true`
else if (isFile === false) queryFile = `AND urls."isFile"=false`

return queryFile
}

private getQueryStateText: (state: string | undefined) => string = (
state,
) => {
let queryState = ''
if (state === 'ACTIVE') queryState = `AND urls.state = 'ACTIVE'`
else if (state === 'INACTIVE') queryState = `AND urls.state = 'INACTIVE'`

return queryState
}

/**
* Generates the ranking algorithm to be used in the ORDER BY clause in the
* SQL statement based on the input sort order.
* @param {SearchResultsSortOrder} order
* @param {string} tableName
* @returns The clause as a string.
*/
private getRankingAlgorithm: (
order: SearchResultsSortOrder,
tableName: string,
) => string = (order, tableName) => {
let rankingAlgorithm
switch (order) {
case SearchResultsSortOrder.Relevance:
{
// The 3rd argument passed into ts_rank_cd represents
// the normalization option that specifies whether and how
// a document's length should impact its rank. It works as a bit mask.
// 1 divides the rank by 1 + the logarithm of the document length
const textRanking = `ts_rank_cd('{0, 0, ${searchDescriptionWeight}, ${searchShortUrlWeight}}',${urlSearchVector}, query, 1)`
rankingAlgorithm = `${textRanking} * log(${tableName}.clicks + 1)`
}
break
case SearchResultsSortOrder.Recency:
rankingAlgorithm = `${tableName}."createdAt"`
break
case SearchResultsSortOrder.Popularity:
rankingAlgorithm = `${tableName}.clicks`
break
default:
throw new Error(`Unsupported SearchResultsSortOrder: ${order}`)
}
return rankingAlgorithm
}

public plainTextSearch: (
conditions: DirectoryQueryConditions,
) => Promise<UrlDirectoryPaginated> = async (conditions) => {
// find urls from text search and email search
const results = await this.urlRepository.rawDirectorySearch(conditions)

const { tableName } = Url
const { isEmail, query, isFile, state, order, limit, offset } = conditions
const rankingAlgorithm = this.getRankingAlgorithm(order, tableName)
if (isEmail) {
const emails = query.toString().split(' ')
const likeQuery = emails.map(sanitiseQuery)
const queryFile = this.getQueryFileEmail(isFile)
const queryState = this.getQueryStateEmail(state)
const results = await this.urlRepository.getRelevantUrlsFromEmail(
likeQuery,
rankingAlgorithm,
limit,
offset,
queryState,
queryFile,
)
return results as UrlDirectoryPaginated
}
const urlVector = urlSearchVector
const queryFile = this.getQueryFileText(isFile)
const queryState = this.getQueryStateText(state)
const results = await this.urlRepository.getRelevantUrlsFromText(
urlVector,
rankingAlgorithm,
limit,
offset,
query,
queryState,
queryFile,
)
return results as UrlDirectoryPaginated
}
}
Expand Down

0 comments on commit ff8be11

Please sign in to comment.