Skip to content

Commit

Permalink
fix: fix callbackQuery whitelist judgment error
Browse files Browse the repository at this point in the history
chore: Extract overall query logic

feat: add support for 4o-audio-preview model (due to sdk limitations, currently only text output is possible), ffmpeg processing of audio is required, only effective in nodejs environment
  • Loading branch information
adolphnov committed Nov 27, 2024
1 parent 0bcfaca commit 166dab3
Show file tree
Hide file tree
Showing 24 changed files with 23,991 additions and 996 deletions.
7 changes: 4 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
FROM node:alpine as DEV
FROM node:alpine AS DEV

WORKDIR /app
COPY package.json vite.config.ts tsconfig.json ./
COPY src ./src
RUN npm install && npm run build:local

FROM node:alpine as PROD
FROM node:alpine AS PROD

WORKDIR /app
COPY --from=DEV /app/dist/index.js /app/dist/index.js
COPY --from=DEV /app/package.json /app/
RUN npm install --only=production --omit=dev && \
apk add --no-cache sqlite
npm cache clean --force && \
apk add --no-cache sqlite ffmpeg
EXPOSE 8787
CMD ["npm", "run", "start:dist"]
1 change: 1 addition & 0 deletions dist/buildinfo.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22,797 changes: 22,797 additions & 0 deletions dist/index.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dist/timestamp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 23 additions & 21 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "chatgpt-telegram-workers",
"type": "module",
"version": "2.0.3",
"version": "2.0.4",
"description": "The easiest and quickest way to deploy your own ChatGPT Telegram bot is to use a single file and simply copy and paste it. There is no need for any dependencies, local development environment configuration, domain names, or servers.",
"author": "tbxark <tbxark@outlook.com>",
"license": "MIT",
Expand Down Expand Up @@ -41,54 +41,56 @@
},
"dependencies": {
"@ai-sdk/anthropic": "^1.0.2",
"@ai-sdk/azure": "^1.0.5",
"@ai-sdk/azure": "^1.0.7",
"@ai-sdk/cohere": "^1.0.3",
"@ai-sdk/google": "^1.0.3",
"@ai-sdk/google-vertex": "^1.0.3",
"@ai-sdk/google": "^1.0.4",
"@ai-sdk/google-vertex": "^1.0.4",
"@ai-sdk/mistral": "^1.0.3",
"@ai-sdk/openai": "^1.0.4",
"@ai-sdk/openai": "^1.0.5",
"@ai-sdk/xai": "^1.0.3",
"ai": "^4.0.3",
"ai": "^4.0.6",
"base64-stream": "^1.0.0",
"cloudflare-worker-adapter": "^1.3.4",
"node-cron": "^3.0.3",
"ws": "^8.18.0"
"fluent-ffmpeg": "^2.1.3",
"node-cron": "^3.0.3"
},
"devDependencies": {
"@ai-sdk/anthropic": "^1.0.2",
"@ai-sdk/azure": "^1.0.5",
"@ai-sdk/azure": "^1.0.7",
"@ai-sdk/cohere": "^1.0.3",
"@ai-sdk/google": "^1.0.3",
"@ai-sdk/google-vertex": "^1.0.3",
"@ai-sdk/google": "^1.0.4",
"@ai-sdk/google-vertex": "^1.0.4",
"@ai-sdk/mistral": "^1.0.3",
"@ai-sdk/openai": "^1.0.4",
"@antfu/eslint-config": "^3.9.2",
"@ai-sdk/openai": "^1.0.5",
"@antfu/eslint-config": "^3.11.0",
"@cloudflare/workers-types": "^4.20241112.0",
"@google-cloud/vertexai": "^1.9.0",
"@navetacandra/ddg": "^0.0.6",
"@rollup/plugin-node-resolve": "^15.3.0",
"@types/base64-stream": "^1.0.5",
"@types/fluent-ffmpeg": "^2.1.27",
"@types/node": "^22.9.3",
"@types/node": "^22.10.0",
"@types/node-cron": "^3.0.11",
"@types/react": "^18.3.12",
"@types/react-dom": "^18.3.1",
"@types/ws": "^8.5.13",
"@vercel/node": "^3.2.26",
"ai": "^4.0.3",
"@vercel/node": "^3.2.27",
"ai": "^4.0.6",
"base64-stream": "^1.0.0",
"eslint": "^9.15.0",
"eslint-plugin-format": "^0.1.2",
"fluent-ffmpeg": "^2.1.3",
"gts": "^6.0.2",
"openai": "^4.73.0",
"openai": "^4.73.1",
"react-dom": "^18.3.1",
"rollup-plugin-cleanup": "^3.2.1",
"rollup-plugin-node-externals": "^7.1.3",
"telegram-bot-api-types": "^7.11.0",
"telegram-bot-api-types": "^8.0.0",
"tsx": "^4.19.2",
"typescript": "^5.7.2",
"vite": "^5.4.11",
"vite": "^6.0.1",
"vite-plugin-checker": "^0.8.0",
"vite-plugin-dts": "^4.3.0",
"wrangler": "^3.90.0",
"wrangler": "^3.91.0",
"ws": "^8.18.0"
}
}
15 changes: 8 additions & 7 deletions scripts/plugins/docker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@ const packageJson = `
},
"dependencies": {
"@ai-sdk/anthropic": "^1.0.2",
"@ai-sdk/azure": "^1.0.5",
"@ai-sdk/azure": "^1.0.7",
"@ai-sdk/cohere": "^1.0.3",
"@ai-sdk/google": "^1.0.3",
"@ai-sdk/google-vertex": "^1.0.3",
"@ai-sdk/google": "^1.0.4",
"@ai-sdk/google-vertex": "^1.0.4",
"@ai-sdk/mistral": "^1.0.3",
"@ai-sdk/openai": "^1.0.4",
"@ai-sdk/openai": "^1.0.5",
"@ai-sdk/xai": "^1.0.3",
"ai": "^4.0.3",
"ai": "^4.0.6",
"base64-stream": "^1.0.0",
"cloudflare-worker-adapter": "^1.3.4",
"node-cron": "^3.0.3",
"ws": "^8.18.0"
"fluent-ffmpeg": "^2.1.3",
"node-cron": "^3.0.3"
},
"devDependencies": {}
}
Expand Down
2 changes: 2 additions & 0 deletions src/agent/model_middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export function AIMiddleware({ config, tools, activeTools, onStream, toolChoice,
// await warpModel(model, config, activeTools, (params.mode as any).toolChoice, chatModel);
recordModelLog(config, model, activeTools, (params.mode as any).toolChoice);
const result = await doGenerate();
log.debug(`doGenerate result: ${JSON.stringify(result)}`);
return result;
},

Expand Down Expand Up @@ -54,6 +55,7 @@ export function AIMiddleware({ config, tools, activeTools, onStream, toolChoice,

onChunk: (data: any) => {
const { chunk } = data;
log.debug(`chunk: ${JSON.stringify(chunk)}`);
if (chunk.type === 'tool-call' && !sendToolCall) {
onStream?.send(`${messageReferencer.join('')}...\n` + `tool call will start: ${chunk.toolName}`);
sendToolCall = true;
Expand Down
19 changes: 16 additions & 3 deletions src/agent/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,15 @@ export class OpenAI extends OpenAIBase implements ChatAgent {
};

readonly model = (ctx: AgentUserConfig, params?: LLMChatRequestParams): string => {
return Array.isArray(params?.content) ? ctx.OPENAI_VISION_MODEL : ctx.OPENAI_CHAT_MODEL;
const msgType = Array.isArray(params?.content) ? params.content.at(-1)?.type : 'text';
switch (msgType) {
case 'image':
return ctx.OPENAI_VISION_MODEL;
case 'file':
return 'gpt-4o-audio-preview';
default:
return ctx.OPENAI_CHAT_MODEL;
}
};

readonly request = async (params: LLMChatParams, context: AgentUserConfig, onStream: ChatStreamTextHandler | null): Promise<{ messages: ResponseMessage[]; content: string }> => {
Expand All @@ -35,6 +43,7 @@ export class OpenAI extends OpenAIBase implements ChatAgent {
baseURL: context.OPENAI_API_BASE,
apiKey: this.apikey(context),
compatibility: 'strict',
// fetch: this.fetch,
});

const languageModelV1 = provider.languageModel(originalModel, undefined);
Expand Down Expand Up @@ -73,8 +82,12 @@ export class OpenAI extends OpenAIBase implements ChatAgent {

readonly fetch = async (url: RequestInfo | URL, options?: RequestInit): Promise<Response> => {
const body = JSON.parse(options?.body as string);
if (body?.model.startsWith(OpenAI.transformModelPerfix)) {
body.model = body.model.slice(OpenAI.transformModelPerfix.length);
// if (body?.model.startsWith(OpenAI.transformModelPerfix)) {
// body.model = body.model.slice(OpenAI.transformModelPerfix.length);
// }
if (body.model === 'gpt-4o-audio-preview') {
body.modalities = ['text', 'audio'];
body.audio = { voice: 'alloy', format: 'opus' };
}
return fetch(url, {
...options,
Expand Down
2 changes: 1 addition & 1 deletion src/agent/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ export async function requestChatCompletionsV2(params: { model: LanguageModelV1;
activeTools: params.activeTools,
onStepFinish: middleware.onStepFinish as (data: StepResult<any>) => void,
};
if (onStream !== null) {
if (onStream !== null /* && params.model.modelId !== 'gpt-4o-audio-preview' */) {
const stream = streamText({
...hander_params,
onChunk: middleware.onChunk as (data: any) => void,
Expand Down
12 changes: 6 additions & 6 deletions src/config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,13 @@ export class ExtraUserConfig {
RERANK_MODELS: string[] = ['gpt-4o-mini', 'gpt-4o-2024-05-13', 'gpt-4o-2024-08-06', 'chatgpt-4o-latest', 'o1-mini', 'o1-preview', 'claude-3-5-sonnet-20240620', 'claude-3-5-sonnet-20241012', 'gemini-1.5-flash-002', 'gemini-1.5-pro-002', 'gemini-1.5-flash-latest', 'gemini-1.5-pro-latest', 'gemini-exp-1114', 'grok-beta', 'grok-vision-beta', 'claude-3-5-haiku-20241012'];
// Whether to enable intelligent model processing
ENABLE_INTELLIGENT_MODEL = false;
// text handle type, to asr or or just 'text' to chat with llm
TEXT_HANDLE_TYPE = 'text';
// Text output type, 'audio' or 'text'
// text handle type, to asr or 'text' to chat with llm, or 'chat' by using audio-preview (default: text)
TEXT_HANDLE_TYPE = 'chat';
// Text output type, 'audio' or 'text' (default: text)
TEXT_OUTPUT = 'text';
// Audio handle type, 'trans' or just 'audio' to chat with llm
AUDIO_HANDLE_TYPE = 'trans';
// Audio output type, 'audio' or 'text'
// Audio handle type, 'trans' or 'audio' to chat with llm, or 'chat' by using audio-preview (default: trans)
AUDIO_HANDLE_TYPE = 'chat';
// Audio output type, 'audio' or 'text' (default: text)
AUDIO_OUTPUT = 'text';
// Audio contains text
AUDIO_CONTAINS_TEXT = true;
Expand Down
86 changes: 0 additions & 86 deletions src/config/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,89 +140,3 @@ export class WorkerContext implements WorkerContextBase {
return new WorkerContext(USER_CONFIG, SHARE_CONTEXT, MIDDLE_CONTEXT);
}
}

export class CallbackQueryContext {
data: string;
query_id: string;
from: Telegram.User;
USER_CONFIG: AgentUserConfig;
SHARE_CONTEXT: ShareContext;

constructor(callbackQuery: Telegram.CallbackQuery, workContext: WorkerContext) {
this.data = callbackQuery.data!;
this.query_id = callbackQuery.id;
this.from = callbackQuery.from!;
this.USER_CONFIG = workContext.USER_CONFIG;
this.SHARE_CONTEXT = workContext.SHARE_CONTEXT;
}
}

export class InlineQueryContext {
token: string;
query_id: string;
from_id: number;
chat_type: string | undefined;
query: string;

constructor(token: string, inlineQuery: Telegram.InlineQuery) {
this.token = token;
this.query_id = inlineQuery.id;
this.from_id = inlineQuery.from.id;
this.chat_type = inlineQuery.chat_type;
this.query = inlineQuery.query;
}
}

export class ChosenInlineContext {
token: string;
from_id: number;
query: string;
result_id: string;
inline_message_id: string;
constructor(token: string, choosenInlineQuery: Telegram.ChosenInlineResult) {
this.token = token;
this.from_id = choosenInlineQuery.from.id;
this.query = choosenInlineQuery.query;
this.result_id = choosenInlineQuery.result_id;
this.inline_message_id = choosenInlineQuery.inline_message_id || '';
}
}

export class ChosenInlineWorkerContext {
USER_CONFIG: AgentUserConfig;
botToken: string;
MIDDLE_CONTEXT: Record<string, any>;
SHARE_CONTEXT: Record<string, any>;
constructor(chosenInline: Telegram.ChosenInlineResult, token: string, USER_CONFIG: AgentUserConfig) {
this.USER_CONFIG = USER_CONFIG;
this.botToken = token;
// 模拟私聊消息
this.MIDDLE_CONTEXT = {
originalMessageInfo: { type: 'text' },
};
this.SHARE_CONTEXT = {
botName: 'AI',
telegraphAccessTokenKey: `telegraph_access_token:${chosenInline.from.id}`,
};
}

static async from(token: string, chosenInline: Telegram.ChosenInlineResult): Promise<ChosenInlineWorkerContext> {
const USER_CONFIG = { ...ENV.USER_CONFIG };
// Same as private chat
let userConfigKey = `user_config:${chosenInline.from.id}`;
const botId = Number.parseInt(token.split(':')[0]);
if (botId) {
userConfigKey += `:${botId}`;
}
try {
const userConfig: AgentUserConfig = JSON.parse(await ENV.DATABASE.get(userConfigKey));
ConfigMerger.merge(USER_CONFIG, ConfigMerger.trim(userConfig, ENV.LOCK_USER_CONFIG_KEYS) || {});
USER_CONFIG.ENABLE_SHOWINFO = ENV.INLINE_QUERY_SHOW_INFO;
// 过于频繁的请求不会被Telegram接受
ENV.TELEGRAM_MIN_STREAM_INTERVAL = ENV.INLINE_QUERY_SEND_INTERVAL;
} catch (e) {
console.warn(e);
}
return new ChosenInlineWorkerContext(chosenInline, token, USER_CONFIG);
}
}
2 changes: 1 addition & 1 deletion src/config/merger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export class ConfigMerger {
if (!sourceKeys.has(key)) {
continue;
}
if (exclude && exclude.includes(key)) {
if (exclude?.includes(key)) {
continue;
}
// 默认为字符串类型
Expand Down
18 changes: 15 additions & 3 deletions src/telegram/handler/chat.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/* eslint-disable unused-imports/no-unused-vars */
import type { FilePart, TextPart, ToolResultPart } from 'ai';
import type { ReadableStream as WebReadableStream } from 'node:stream/web';
import type * as Telegram from 'telegram-bot-api-types';
import type { ChatStreamTextHandler, HistoryModifier, ImageResult, LLMChatRequestParams } from '../../agent/types';
import type { WorkerContext } from '../../config/context';
Expand All @@ -15,6 +16,7 @@ import { clearLog, getLog, logSingleton } from '../../log/logDecortor';
import { log } from '../../log/logger';
import { sendToolResult } from '../../tools';
import { imageToBase64String } from '../../utils/image';
import { OggToMp3Converter } from '../../utils/others/audio';
import { createTelegramBotAPI } from '../api';
import { escape } from '../utils/md2tgmd';
import { MessageSender, sendAction, TelegraphSender } from '../utils/send';
Expand Down Expand Up @@ -139,10 +141,19 @@ export class ChatHandler implements MessageHandler<WorkerContext> {
});
}
} else if (type === 'audio' || type === 'voice') {
const isChat = context.USER_CONFIG.AUDIO_HANDLE_TYPE === 'chat';
let audioData = urls[0];
if (isChat) {
const response = await fetch(urls[0]);
if (!response.body) {
throw new Error('Failed to fetch audio data');
}
audioData = await new OggToMp3Converter(response.body as WebReadableStream, 'base64').convert() as string;
}
params.content.push({
type: 'file',
data: urls[0],
mimeType: 'audio/ogg',
data: audioData,
mimeType: 'audio/mpeg',
});
}
}
Expand Down Expand Up @@ -288,12 +299,13 @@ function workflowHandlers(type: string): WorkflowHandler {
case 'image:text':
case 'photo:text':
case 'text:chat':
case 'chat:text':
case 'chat:audio':
return handleText;
case 'text:image':
return handleTextToImage;
case 'audio:text':
case 'audio:audio':
case 'audio:chat':
case 'trans:text':
case 'trans:audio':
return handleAudio;
Expand Down
Loading

0 comments on commit 166dab3

Please sign in to comment.