Skip to content

Commit

Permalink
refactor(directory): revert back to initial directory pipeline
Browse files Browse the repository at this point in the history
- move preprocess back to urlrepository
- add back rawdirectorysearch
- revert back tests
  • Loading branch information
Oxiang committed Dec 10, 2020
1 parent 392724f commit c9ec551
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 273 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ describe('UrlManagementService', () => {
findByShortUrl: jest.fn(),
getLongUrl: jest.fn(),
plainTextSearch: jest.fn(),
getRelevantUrlsFromEmail: jest.fn(),
getRelevantUrlsFromText: jest.fn(),
rawDirectorySearch: jest.fn(),
}

const service = new UrlManagementService(userRepository, urlRepository)
Expand Down
148 changes: 138 additions & 10 deletions src/server/repositories/UrlRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ import { QueryTypes } from 'sequelize'
import { Url, UrlType } from '../models/url'
import { NotFoundError } from '../util/error'
import { redirectClient } from '../redis'
import { logger, redirectExpiry } from '../config'
import {
logger,
redirectExpiry,
searchDescriptionWeight,
searchShortUrlWeight,
} from '../config'
import { sequelize } from '../util/sequelize'
import { DependencyIds } from '../constants'
import { FileVisibility, S3Interface } from '../services/aws'
Expand All @@ -18,6 +23,10 @@ import {
} from './types'
import { StorableUrlState } from './enums'
import { Mapper } from '../mappers/Mapper'
import { SearchResultsSortOrder } from '../../shared/search'
import { urlSearchVector } from '../models/search'
import { DirectoryQueryConditions } from '../services/interfaces/DirectorySearchServiceInterface'
import { extractShortUrl, sanitiseQuery } from '../util/parse'

const { Public, Private } = FileVisibility

Expand Down Expand Up @@ -132,14 +141,54 @@ export class UrlRepository implements UrlRepositoryInterface {
}
}

public async getRelevantUrlsFromEmail(
likeQuery: string[],
public rawDirectorySearch: (
conditions: DirectoryQueryConditions,
) => Promise<UrlDirectoryPaginated> = async (conditions) => {
const { query, order, limit, offset, state, isFile, isEmail } = conditions

const { tableName } = Url

const urlVector = urlSearchVector

const rankingAlgorithm = this.getRankingAlgorithm(order, tableName)

const urlsModel = await (isEmail
? this.getRelevantUrlsFromEmail(
query,
rankingAlgorithm,
limit,
offset,
state,
isFile,
)
: this.getRelevantUrlsFromText(
urlVector,
rankingAlgorithm,
limit,
offset,
query,
state,
isFile,
))

return urlsModel
}

private async getRelevantUrlsFromEmail(
query: string,
rankingAlgorithm: string,
limit: number,
offset: number,
queryState: string[],
queryFile: boolean[],
state: string | undefined,
isFile: boolean | undefined,
): Promise<UrlDirectoryPaginated> {
const emails = query.toString().split(' ')
// split email/domains by space into tokens, also reduces injections
const likeQuery = emails.map(sanitiseQuery)

const queryFile = this.getQueryFileEmail(isFile)
const queryState = this.getQueryStateEmail(state)

// TODO: optimize the search query, possibly with reverse-email search
const rawQuery = `
SELECT "users"."email", "urls"."shortUrl", "urls"."state", "urls"."isFile"
Expand Down Expand Up @@ -171,21 +220,25 @@ export class UrlRepository implements UrlRepositoryInterface {
return { count, urls: slicedUrlsModel }
}

public async getRelevantUrlsFromText(
private async getRelevantUrlsFromText(
urlVector: string,
rankingAlgorithm: string,
limit: number,
offset: number,
query: string,
queryState: string,
queryFile: string,
state: string | undefined,
isFile: boolean | undefined,
): Promise<UrlDirectoryPaginated> {
// Extract shortUrls with regex
const newQuery = extractShortUrl(query)
const queryFile = this.getQueryFileText(isFile)
const queryState = this.getQueryStateText(state)
const rawQuery = `
SELECT "urls"."shortUrl", "users"."email", "urls"."state", "urls"."isFile"
FROM urls AS "urls"
JOIN users
ON "urls"."userId" = "users"."id"
JOIN plainto_tsquery('english', $query) query
JOIN plainto_tsquery('english', $newQuery) query
ON query @@ (${urlVector})
${queryFile}
${queryState}
Expand All @@ -194,7 +247,7 @@ export class UrlRepository implements UrlRepositoryInterface {
// Search only once to get both urls and count
const urlsModel = (await sequelize.query(rawQuery, {
bind: {
query,
newQuery,
},
raw: true,
type: QueryTypes.SELECT,
Expand All @@ -209,6 +262,46 @@ export class UrlRepository implements UrlRepositoryInterface {
return { count, urls: slicedUrlsModel }
}

private getQueryFileEmail: (isFile: boolean | undefined) => Array<boolean> = (
isFile,
) => {
let queryFile = [true, false]
if (isFile === true) queryFile = [true]
else if (isFile === false) queryFile = [false]

return queryFile
}

private getQueryStateEmail: (state: string | undefined) => Array<string> = (
state,
) => {
let queryState = ['ACTIVE', 'INACTIVE']
if (state === 'ACTIVE') queryState = ['ACTIVE']
else if (state === 'INACTIVE') queryState = ['INACTIVE']

return queryState
}

private getQueryFileText: (isFile: boolean | undefined) => string = (
isFile,
) => {
let queryFile = ''
if (isFile === true) queryFile = `AND urls."isFile"=true`
else if (isFile === false) queryFile = `AND urls."isFile"=false`

return queryFile
}

private getQueryStateText: (state: string | undefined) => string = (
state,
) => {
let queryState = ''
if (state === 'ACTIVE') queryState = `AND urls.state = 'ACTIVE'`
else if (state === 'INACTIVE') queryState = `AND urls.state = 'INACTIVE'`

return queryState
}

/**
* Invalidates the redirect entry on the cache for the input
* short url.
Expand Down Expand Up @@ -288,6 +381,41 @@ export class UrlRepository implements UrlRepositoryInterface {
})
})
}

/**
* Generates the ranking algorithm to be used in the ORDER BY clause in the
* SQL statement based on the input sort order.
* @param {SearchResultsSortOrder} order
* @param {string} tableName
* @returns The clause as a string.
*/
private getRankingAlgorithm(
order: SearchResultsSortOrder,
tableName: string,
): string {
let rankingAlgorithm
switch (order) {
case SearchResultsSortOrder.Relevance:
{
// The 3rd argument passed into ts_rank_cd represents
// the normalization option that specifies whether and how
// a document's length should impact its rank. It works as a bit mask.
// 1 divides the rank by 1 + the logarithm of the document length
const textRanking = `ts_rank_cd('{0, 0, ${searchDescriptionWeight}, ${searchShortUrlWeight}}',${urlSearchVector}, query, 1)`
rankingAlgorithm = `${textRanking} * log(${tableName}.clicks + 1)`
}
break
case SearchResultsSortOrder.Recency:
rankingAlgorithm = `${tableName}."createdAt"`
break
case SearchResultsSortOrder.Popularity:
rankingAlgorithm = `${tableName}.clicks`
break
default:
throw new Error(`Unsupported SearchResultsSortOrder: ${order}`)
}
return rankingAlgorithm
}
}

export default UrlRepository
42 changes: 6 additions & 36 deletions src/server/repositories/interfaces/UrlRepositoryInterface.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { StorableFile, StorableUrl, UrlDirectoryPaginated } from '../types'
import { DirectoryQueryConditions } from '../../services/interfaces/DirectorySearchServiceInterface'

/**
* A url repository that handles access to the data store of Urls.
Expand Down Expand Up @@ -41,42 +42,11 @@ export interface UrlRepositoryInterface {
getLongUrl: (shortUrl: string) => Promise<string>

/**
* Search results base on email domains.
* @param {string[]} likeQuery List of valid email and domains for SQL query.
* @param {string} rankingAlgorithm Sort order.
* @param {number} limit Number of results returned.
* @param {number} offset Number of results skipped.
* @param {string[]} queryState List of states to retrieve for SQL query.
* @param {boolean[]} queryFile List of url types to retrieve for SQL query.
* @returns Promise that returns list of longUrl and count.
* Performs search for email and plain text search.
* @param {DirectoryQueryConditions} conditions The search query conditions.
* @returns Promise of total no. Of search results and the results on the current page.
*/
getRelevantUrlsFromEmail: (
likeQuery: string[],
rankingAlgorithm: string,
limit: number,
offset: number,
queryState: string[],
queryFile: boolean[],
) => Promise<UrlDirectoryPaginated>

/**
* Search results base on keywords.
* @param {string} urlVector Vectorised search expression.
* @param {string} rankingAlgorithm Sort order.
* @param {number} limit Number of results returned.
* @param {number} offset Number of results skipped.
* @param {string} query Search query to be vectorised.
* @param {string} queryState States to retrieve for SQL query.
* @param {string} queryFile Url types to retrieve for SQL query.
* @returns Promise that returns list of longUrl and count.
*/
getRelevantUrlsFromText: (
urlVector: string,
rankingAlgorithm: string,
limit: number,
offset: number,
query: string,
queryState: string,
queryFile: string,
rawDirectorySearch: (
conditions: DirectoryQueryConditions,
) => Promise<UrlDirectoryPaginated>
}
Loading

0 comments on commit c9ec551

Please sign in to comment.