Skip to content

Commit

Permalink
CR fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ikprk committed Jul 3, 2024
1 parent c42d1ac commit 7945106
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 140 deletions.
16 changes: 0 additions & 16 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@
"patch-package": "^6.5.0",
"pg": "8.8.0",
"swagger-ui-express": "^4.6.2",
"tinyld": "^1.3.4",
"type-graphql": "^1.2.0-rc.1",
"typeorm": "^0.3.11",
"ua-parser-js": "^1.0.34",
Expand All @@ -106,8 +105,8 @@
"@subsquid/substrate-typegen": "^2.1.0",
"@subsquid/typeorm-codegen": "0.3.1",
"@types/async-lock": "^1.1.3",
"@types/chai": "^4.3.11",
"@types/big-json": "^3.2.4",
"@types/chai": "^4.3.11",
"@types/cookie-parser": "^1.4.3",
"@types/express-rate-limit": "^6.0.0",
"@types/mocha": "^10.0.1",
Expand Down
19 changes: 2 additions & 17 deletions src/mappings/content/video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ import {
VideoPosted,
VideoViewEvent,
} from '../../model'
import { VIDEO_ORION_LANGUAGE_CURSOR_NAME } from '../../utils/customMigrations/setOrionLanguageProvider'
import { EventHandlerContext } from '../../utils/events'
import { predictVideoLanguage } from '../../utils/language'
import { OrionVideoLanguageManager } from '../../utils/OrionVideoLanguageManager'
import {
deserializeMetadata,
genericEventFields,
Expand Down Expand Up @@ -125,12 +122,7 @@ export async function processVideoCreatedEvent({
}
}

video.orionLanguage = VIDEO_ORION_LANGUAGE_CURSOR_NAME
? null
: predictVideoLanguage({
title: video.title ?? '',
description: video.description ?? '',
})
video.orionLanguage = null

channel.totalVideosCreated += 1

Expand Down Expand Up @@ -197,14 +189,7 @@ export async function processVideoUpdatedEvent({
)
}

if (VIDEO_ORION_LANGUAGE_CURSOR_NAME) {
orionVideoLanguageManager.scheduleVideoForDetection(video.id)
} else {
video.orionLanguage = predictVideoLanguage({
title: video.title ?? '',
description: video.description ?? '',
})
}
orionVideoLanguageManager.scheduleVideoForDetection(video.id)

if (autoIssueNft) {
await processNft(overlay, block, indexInBlock, extrinsicHash, video, contentActor, autoIssueNft)
Expand Down
4 changes: 3 additions & 1 deletion src/utils/OrionVideoLanguageManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ export class OrionVideoLanguageManager {
private async updateLoop(intervalMs: number): Promise<void> {
const em = await globalEm
while (true) {
await this.updateScheduledVideoLanguage(em)
await this.updateScheduledVideoLanguage(em).catch((e) => {
console.log(`Updating scheduled videos Orion language with provider failed`, e)
})
await this.updateOrionVideoLanguage().catch((e) => {
console.log(`Updating Orion language with provider failed`, e)
})
Expand Down
56 changes: 0 additions & 56 deletions src/utils/customMigrations/setOrionLanguage.ts

This file was deleted.

44 changes: 24 additions & 20 deletions src/utils/customMigrations/setOrionLanguageProvider.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { EntityManager } from 'typeorm'
import { OrionOffchainCursor } from '../../model'
import { globalEm } from '../globalEm'
import { predictLanguageForArray } from '../language'
import { predictLanguageWithProvider } from '../language'

const batchSize = 5_000 // Adjust the batch size based on your database and network performance

Expand All @@ -16,7 +16,7 @@ export const VIDEO_ORION_LANGUAGE_CURSOR_NAME = 'video_orion_language'
export async function updateVideoLanguages(em: EntityManager, videos: VideoUpdateType[]) {
const mappedVideos = videos.map((video) => `${video.title} ${video.description}`)

const predictionForVideos = await predictLanguageForArray(mappedVideos)
const predictionForVideos = await predictLanguageWithProvider(mappedVideos)

const videosWithDetections = videos.map((video, index) => ({
...video,
Expand All @@ -40,32 +40,36 @@ export async function updateVideoLanguages(em: EntityManager, videos: VideoUpdat

export async function detectVideoLanguageWithProvider() {
const em: EntityManager = await globalEm
const cursorEntity: { value: string }[] = await em.query(
let cursorEntity: { value: number }[] = await em.query(
`SELECT value FROM orion_offchain_cursor WHERE cursor_name='${VIDEO_ORION_LANGUAGE_CURSOR_NAME}'`
)
const cursor = +(cursorEntity[0]?.value ?? 0)
while (true) {
const cursor = +(cursorEntity[0]?.value ?? 0)

const videos: VideoUpdateType[] = await em.query(`
const videos: VideoUpdateType[] = await em.query(`
SELECT id, title, description
FROM admin.video
ORDER BY id::INTEGER ASC
OFFSET ${cursor}
LIMIT ${batchSize}
`)
`)

if (!videos.length) {
console.log('No more videos!')
return
}
await updateVideoLanguages(em, videos)
const newCursor = new OrionOffchainCursor({
cursorName: VIDEO_ORION_LANGUAGE_CURSOR_NAME,
value: cursor + Math.min(batchSize, videos.length),
})
await em.save(newCursor)
console.log(
`Updated languages for videos in range ${cursor}-${cursor + Math.min(batchSize, videos.length)}`
)
if (!videos.length) {
console.log('No more videos!')
break
}

await detectVideoLanguageWithProvider()
await updateVideoLanguages(em, videos)
const newCursor = new OrionOffchainCursor({
cursorName: VIDEO_ORION_LANGUAGE_CURSOR_NAME,
value: cursor + Math.min(batchSize, videos.length),
})
await em.save(newCursor)
cursorEntity = [newCursor]
console.log(
`Updated languages for videos in range ${cursor}-${
cursor + Math.min(batchSize, videos.length)
}`
)
}
}
31 changes: 3 additions & 28 deletions src/utils/language.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { detectAll } from 'tinyld'
import DetectLanguage from 'detectlanguage'

const languageDetectionApiKey = process.env.DETECTLANGUAGE_API_KEY
Expand All @@ -15,32 +14,8 @@ function cleanString(input: string): string {
return cleanedString
}

function predictLanguage(text: string): { lang: string; accuracy: number } | undefined {
const cleanedText = cleanString(text)

// Get the most accurate language prediction
return detectAll(cleanedText)?.[0]
}

export async function predictLanguageForArray(texts: string[]) {
const result = await languageDetectionInstace.detect(texts)
export async function predictLanguageWithProvider(texts: string[]) {
const cleanedTexts = texts.map(cleanString)
const result = await languageDetectionInstace.detect(cleanedTexts)
return result.map((row) => row[0].language)
}

export function predictVideoLanguage({ title, description }: any): string | undefined {
let detectedLang: string | undefined

const titleLang = predictLanguage(title ?? '')

detectedLang = titleLang?.lang

if ((titleLang?.accuracy || 0) < 0.5) {
const titleAndDescriptionLang = predictLanguage(`${title} ${description}`)
if ((titleAndDescriptionLang?.accuracy || 0) > (titleLang?.accuracy || 0)) {
// then
detectedLang = titleAndDescriptionLang?.lang
}
}

return detectedLang
}

0 comments on commit 7945106

Please sign in to comment.