Skip to content

Commit

Permalink
feat: improve youtube and bilibili subtitles
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmyLv committed Mar 8, 2023
1 parent 2eccaa3 commit f285f4e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 19 deletions.
18 changes: 2 additions & 16 deletions lib/bilibili/fetchBilibiliSubtitle.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { fetchBilibiliSubtitleUrls } from "~/lib/bilibili/fetchBilibiliSubtitleUrls";
import { reduceBilibiliSubtitleTimestamp } from "~/utils/reduceSubtitleTimestamp";

export async function fetchBilibiliSubtitle(videoId: string) {
// const res = await pRetry(async () => await fetchBilibiliSubtitles(videoId), {
Expand Down Expand Up @@ -26,21 +27,6 @@ export async function fetchBilibiliSubtitle(videoId: string) {

const subtitleResponse = await fetch(subtitleUrl);
const subtitles = await subtitleResponse.json();
/*{
"from": 16.669,
"to": 18.619,
"sid": 8,
"location": 2,
"content": "让ppt变得更加精彩",
"music": 0.0
},*/
const transcripts = subtitles?.body.map(
(item: { from: number; content: string }, index: number) => {
return {
text: `${item.from}: ${item.content}`,
index
};
}
);
const transcripts = reduceBilibiliSubtitleTimestamp(subtitles?.body);
return { title, subtitlesArray: transcripts, descriptionText };
}
9 changes: 7 additions & 2 deletions middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@ export async function middleware(req: NextRequest, context: NextFetchEvent) {
}
}

if (isDev) {
return NextResponse.next();
}
// 👇 below only works for production

if (!userKey) {
const identifier = req.ip ?? "127.0.0.10";
const identifier = req.ip ?? "127.0.0.11";
const { success, remaining } = await ratelimitForIps.limit(identifier);
console.log(
`======== ip ${identifier}, remaining: ${remaining} ========`
Expand Down Expand Up @@ -80,7 +85,7 @@ export async function middleware(req: NextRequest, context: NextFetchEvent) {
}

const result = await redis.get<string>(cacheId);
if (!isDev && result) {
if (result) {
console.log("hit cache for ", cacheId);
return NextResponse.json(result);
}
Expand Down
44 changes: 43 additions & 1 deletion utils/reduceSubtitleTimestamp.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { CommonSubtitleItem } from "~/lib/types";

export type YoutubeSubtitleItem = { start: number; lines: string[] };
/*{ "from": 16.669, "content": "让ppt变得更加精彩" },*/
export type BilibiliSubtitleItem = { from: number; content: string };

export function reduceSubtitleTimestamp(
subtitles: Array<YoutubeSubtitleItem>
subtitles: Array<YoutubeSubtitleItem> = []
): Array<CommonSubtitleItem> {
// 把字幕数组总共分成 20 组
const TOTAL_GROUP_COUNT = 20;
Expand Down Expand Up @@ -42,3 +44,43 @@ export function reduceSubtitleTimestamp(
[]
);
}
export function reduceBilibiliSubtitleTimestamp(
subtitles: Array<BilibiliSubtitleItem> = []
): Array<CommonSubtitleItem> {
// 把字幕数组总共分成 20 组
const TOTAL_GROUP_COUNT = 20;
// 如果字幕不够多,就每三组合并一下
const MINIMUM_COUNT_ONE_GROUP = 3;
const eachGroupCount =
subtitles.length > TOTAL_GROUP_COUNT
? subtitles.length / TOTAL_GROUP_COUNT
: MINIMUM_COUNT_ONE_GROUP;

return subtitles.reduce(
(
accumulator: CommonSubtitleItem[],
current: BilibiliSubtitleItem,
index: number
) => {
// 计算当前元素在哪一组
const groupIndex: number = Math.floor(index / eachGroupCount);

// 如果是当前组的第一个元素,初始化这一组的字符串
if (!accumulator[groupIndex]) {
accumulator[groupIndex] = {
// 5.88 -> 5.9
// text: current.start.toFixed() + ": ",
text: current.from + ": ",
index: groupIndex,
};
}

// 将当前元素添加到当前组的字符串末尾
accumulator[groupIndex].text =
accumulator[groupIndex].text + current.content + " ";

return accumulator;
},
[]
);
}

1 comment on commit f285f4e

@vercel
Copy link

@vercel vercel bot commented on f285f4e Mar 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.