diff --git a/src/docs.ts b/src/docs.ts deleted file mode 100644 index 5a600ba..0000000 --- a/src/docs.ts +++ /dev/null @@ -1,227 +0,0 @@ -import { SupabaseClient } from '@supabase/supabase-js'; -import { BgentRuntime, addLore, wait } from 'bgent'; -import { Octokit } from 'octokit'; - -export interface ProcessDocsParams { - supabase: SupabaseClient; - octokit: Octokit; - repoOwner: string; - repoName: string; - pathToRepoDocuments: string; - documentationFileExt: string; - sectionDelimiter: string; - sourceDocumentationUrl: string; - env: { [key: string]: string }; -} - -/** - * Splits a document into logical sections by a delimiter. - * Currently only works for Markdown (.MD) files. - * @param {string} documentContent - The content of the file. - * @param {string} sectionDelimiter - Character sequence to sectionize the file content. - * @returns {object} - The document sections (`sections`) and documentation URL (`url`). - */ -function sectionizeDocument(documentContent: string, sectionDelimiter: string) { - // Retrieve YAML header and extract out documentation url path. - const yamlHeader = documentContent.match(/---\n([\s\S]+?)\n---/); - - // Split the remaining content into sections based on the YAML header and delimiter. - const delim = new RegExp(`\\n+${sectionDelimiter}+\\s+`); - const sections = documentContent - .replace(yamlHeader ? yamlHeader[0] : '', '') - .split(delim); - - // Debug - //printSectionizedDocument(sections); - - return { sections: sections }; -} - -/** - * Retrieves, processes, and stores all documents on a GitHub repository to a - * pgvector in Supabase. Currently only supports Markdown (.MD) files. - * @param {ProcessDocsParams} params - An object that conforms to the ProcessDocsParams interface. - */ -async function makeRequest( - octokit: Octokit, - requestOptions: { - method: string; - url: string; - owner: string; - repo: string; - path?: string; - headers: - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string }; - pull_number?: number; - per_page?: number; - page?: number; - }, -) { - try { - const response = await octokit.request(requestOptions); - return response; - // @ts-expect-error - weird error - } catch (error: { status: number; headers: { [x: string]: string } }) { - if ( - error.status === 403 && - error.headers['x-ratelimit-remaining'] === '0' - ) { - const retryAfter = - parseInt(error.headers['x-ratelimit-reset'], 10) - - Math.floor(Date.now() / 1000); - console.log(`Rate limited. Retrying in ${retryAfter} seconds...`); - await new Promise((resolve) => setTimeout(resolve, retryAfter * 1000)); - return makeRequest(octokit, requestOptions); - } else { - throw error; - } - } -} - -export async function vectorizeDocuments(params: ProcessDocsParams) { - try { - const { - supabase, - octokit, - repoOwner, - repoName, - pathToRepoDocuments, - documentationFileExt, - sectionDelimiter, - sourceDocumentationUrl, - env, - } = params; - - // Fetch the documentation directories or files. - let response = await makeRequest(octokit, { - method: 'GET', - url: '/repos/{owner}/{repo}/contents/{path}', - owner: repoOwner, - repo: repoName, - path: pathToRepoDocuments, - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - }, - }); - - response.data = Array.isArray(response.data) - ? response.data - : [response.data]; - - // Process documents in each directory. - for (const resData of response.data) { - let dirDocuments = []; - if (resData.type == 'dir') { - console.log('requesting dir: ', resData.name); - // Fetch all files from the directory. - response = await makeRequest(octokit, { - method: 'GET', - url: '/repos/{owner}/{repo}/contents/{path}', - owner: repoOwner, - repo: repoName, - path: pathToRepoDocuments + '/' + resData.name, - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - }, - }); - - const documentsArray = response.data as { - name: string; - path: string; - }[]; - dirDocuments = documentsArray.filter((document) => - document.name.endsWith(`.${documentationFileExt}`), - ); - } else if (resData.type == 'file') { - dirDocuments = [resData]; - } else { - throw new Error('Repository URL does not exist!'); - } - - // Retrieve and process document data for each document sequentially. - for (const document of dirDocuments) { - console.log('requesting doc: ', document.path); - const contentResponse = await makeRequest(octokit, { - method: 'GET', - url: '/repos/{owner}/{repo}/contents/{path}', - owner: repoOwner, - repo: repoName, - path: document.path, - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - }, - }); - - const decodedContent = Buffer.from( - (contentResponse.data as { content: string }).content, - 'base64', - ).toString('utf-8'); - const { sections } = sectionizeDocument( - decodedContent, - sectionDelimiter, - ); - const updatedPath = document.path.replace('docs/', ''); - const runtime = new BgentRuntime({ - debugMode: true, - serverUrl: 'https://api.openai.com/v1', - supabase: supabase, - token: env.OPENAI_API_KEY, - evaluators: [], - actions: [wait], - }); - for (const document of sections) { - await addLore({ - runtime, - content: { content: document }, - source: sourceDocumentationUrl + updatedPath, - }); - } - } - // wait 200 ms - await new Promise((resolve) => setTimeout(resolve, 200)); - } - } catch (error) { - console.error('Error fetching data from GitHub API:', error); - } -} - -export async function fetchLatestPullRequest( - params: ProcessDocsParams, - pullRequestNum: string, -) { - try { - const { octokit, repoOwner, repoName, pathToRepoDocuments } = params; - - const page = 1; - - const response = await makeRequest(octokit, { - method: 'GET', - url: '/repos/{owner}/{repo}/pulls/{pull_number}/files', - owner: repoOwner, - repo: repoName, - pull_number: parseInt(pullRequestNum), - per_page: 100, - page: page, - headers: { - 'X-GitHub-Api-Version': '2022-11-28', - }, - }); - - // Iterate over each file path sequentially - for (const filePath of response.data) { - if (filePath.filename.includes(`${pathToRepoDocuments}/`)) { - params.pathToRepoDocuments = filePath.filename as string; - await vectorizeDocuments(params); // Process each document one by one - // wait 200 ms - await new Promise((resolve) => setTimeout(resolve, 200)); - } - } - // wait 200 ms - await new Promise((resolve) => setTimeout(resolve, 200)); - } catch (error) { - console.error('Error fetching data from GitHub API:', error); - } -} diff --git a/src/index.ts b/src/index.ts index 7db2a31..2e4170c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -355,6 +355,7 @@ router.get('/refresh-docs', async (request, _env) => { }); router.post('/vectorize-document', async (request, env) => { + console.log('received request to vectorize-document'); const { id, sourceUrl } = (await request.json()) as { id: string; sourceUrl: string; @@ -363,6 +364,7 @@ router.post('/vectorize-document', async (request, env) => { const processDocsParams = await initializeSupabaseAndOpenAIVariable(env); try { + console.log('processing document:', sourceUrl); // Fetch the document content from GitHub const response = await processDocsParams.octokit.request( 'GET /repos/{owner}/{repo}/contents/{path}', @@ -397,6 +399,7 @@ router.post('/vectorize-document', async (request, env) => { }); for (const section of sections) { + console.log('vectorizing section:', section); await addLore({ runtime, content: { content: section }, @@ -423,14 +426,16 @@ router.post('/vectorize-file', async (request, env) => { const { octokit, repoOwner, repoName } = await initializeSupabaseAndOpenAIVariable(env); - const { filePath, sectionDelimiter, sourceDocumentationUrl } = - (await request.json()) as { - filePath: string; - sectionDelimiter: string; - sourceDocumentationUrl: string; - }; + console.log('received request to vectorize-file'); try { + const { filePath, sectionDelimiter, sourceDocumentationUrl } = + (await request.json()) as { + filePath: string; + sectionDelimiter: string; + sourceDocumentationUrl: string; + }; + const contentResponse = await makeRequest(octokit, { method: 'GET', url: '/repos/{owner}/{repo}/contents/{path}', @@ -477,6 +482,7 @@ router.post('/vectorize-file', async (request, env) => { router.post('/vectorize-directory', async (request, env) => { const { octokit, repoOwner, repoName } = await initializeSupabaseAndOpenAIVariable(env); + console.log('received request to vectorize-directory'); const { directoryPath, @@ -509,10 +515,11 @@ router.post('/vectorize-directory', async (request, env) => { const dirDocuments = documentsArray.filter((document) => document.name.endsWith(`.${documentationFileExt}`), ); - + console.log('dirDocuments', dirDocuments); // Make requests to the /vectorize-file route for each file in the directory for (const document of dirDocuments) { - await fetch(`${env.WORKER_URL}/vectorize-file`, { + console.log('requesting file: ', document.path); + await env.afbot.fetch(`${env.WORKER_URL}/vectorize-file`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -801,7 +808,9 @@ async function initializeSupabaseAndOpenAIVariable(env: { return { supabase: supabase, - octokit: new Octokit({ auth: env.GITHUB_AUTH_TOKEN }), + octokit: new Octokit({ + auth: env.GITHUB_AUTH_TOKEN, + }), repoOwner: process.env.REPO_OWNER ?? 'aframevr', repoName: process.env.REPO_NAME ?? 'aframe', pathToRepoDocuments: 'docs', @@ -872,11 +881,7 @@ async function makeRequest( owner: string; repo: string; path?: string; - headers: - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string } - | { 'X-GitHub-Api-Version': string }; + headers: { 'X-GitHub-Api-Version': string }; pull_number?: number; per_page?: number; page?: number; @@ -885,8 +890,11 @@ async function makeRequest( try { const response = await octokit.request(requestOptions); return response; - // @ts-expect-error - weird error - } catch (error: { status: number; headers: { [x: string]: string } }) { + } catch (_error: unknown) { + const error = _error as { + status: number; + headers: { [x: string]: string }; + }; if ( error.status === 403 && error.headers['x-ratelimit-remaining'] === '0' @@ -904,6 +912,7 @@ async function makeRequest( } export async function vectorizeDocuments(params: ProcessDocsParams) { + console.log('vectorizing docs'); try { const { octokit, @@ -937,26 +946,56 @@ export async function vectorizeDocuments(params: ProcessDocsParams) { if (resData.type === 'dir') { console.log('requesting dir: ', resData.name); console.log(`${env.WORKER_URL}/vectorize-directory`); - await fetch(`${env.WORKER_URL}/vectorize-directory`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - directoryPath: pathToRepoDocuments + '/' + resData.name, - documentationFileExt, - sectionDelimiter, - sourceDocumentationUrl, - }), - }); + + // @ts-expect-error - This is a valid fetch response + const response = await env.afbot.fetch( + `${env.WORKER_URL}/vectorize-directory`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + directoryPath: pathToRepoDocuments + '/' + resData.name, + documentationFileExt, + sectionDelimiter, + sourceDocumentationUrl, + }), + }, + ); + // check if response is ok + if (!response.ok) { + console.error('Error vectorizing directory', { + // what was the target url + responseUrl: response.url, + responseStatusText: response.statusText, + responseStatus: response.status, + responseText: await response.text(), + }); + throw new Error('Error vectorizing directory'); + } else { + console.log('response is ok'); + } } else if (resData.type === 'file') { - await fetch(`${env.WORKER_URL}/vectorize-file`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - filePath: resData.path, - sectionDelimiter, - sourceDocumentationUrl, - }), - }); + // @ts-expect-error - This is a valid fetch response + const response = await env.afbot.fetch( + `${env.WORKER_URL}/vectorize-file`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + filePath: resData.path, + sectionDelimiter, + sourceDocumentationUrl, + }), + }, + ); + if (!response.ok) { + // log the error itself + const error = await response.text(); + console.error('Error vectorizing file:', error); + throw new Error('Error vectorizing file' + error); + } else { + console.log('response is ok'); + } } else { throw new Error('Repository URL does not exist!'); } @@ -991,15 +1030,19 @@ export async function fetchLatestPullRequest( // Iterate over each file path sequentially for (const filePath of response.data) { if (filePath.filename.includes(`${pathToRepoDocuments}/`)) { - await fetch(`${params.env.WORKER_URL}/vectorize-file`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - filePath: filePath.filename, - sectionDelimiter: params.sectionDelimiter, - sourceDocumentationUrl: params.sourceDocumentationUrl, - }), - }); + // @ts-expect-error - This is a valid fetch response + await params.env.afbot.fetch( + `${params.env.WORKER_URL}/vectorize-file`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + filePath: filePath.filename, + sectionDelimiter: params.sectionDelimiter, + sourceDocumentationUrl: params.sourceDocumentationUrl, + }), + }, + ); } } } catch (error) { diff --git a/wrangler.toml b/wrangler.toml index 88f90ae..66ca3c4 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -4,4 +4,8 @@ compatibility_date = "2023-11-21" node_compat = true [dev] -port = 8787 \ No newline at end of file +port = 8787 + +[[services]] +binding = "afbot" +service = "afbot" \ No newline at end of file