From fa01d83031486272924ed86ea99b533648569c70 Mon Sep 17 00:00:00 2001 From: Ritesh Ghosh Date: Tue, 6 Aug 2024 00:07:13 +0530 Subject: [PATCH] feat: add scraper extractors --- src/extractors/index.ts | 6 + src/extractors/megacloud.ts | 227 +++++++++++++++++++++++++++++++++++ src/extractors/rapidcloud.ts | 166 +++++++++++++++++++++++++ src/extractors/streamsb.ts | 83 +++++++++++++ src/extractors/streamtape.ts | 37 ++++++ 5 files changed, 519 insertions(+) create mode 100644 src/extractors/index.ts create mode 100644 src/extractors/megacloud.ts create mode 100644 src/extractors/rapidcloud.ts create mode 100644 src/extractors/streamsb.ts create mode 100644 src/extractors/streamtape.ts diff --git a/src/extractors/index.ts b/src/extractors/index.ts new file mode 100644 index 0000000..788dc8c --- /dev/null +++ b/src/extractors/index.ts @@ -0,0 +1,6 @@ +import StreamSB from "./streamsb.js"; +import StreamTape from "./streamtape.js"; +import RapidCloud from "./rapidcloud.js"; +import MegaCloud from "./megacloud.js"; + +export { StreamSB, StreamTape, RapidCloud, MegaCloud }; diff --git a/src/extractors/megacloud.ts b/src/extractors/megacloud.ts new file mode 100644 index 0000000..9525ebb --- /dev/null +++ b/src/extractors/megacloud.ts @@ -0,0 +1,227 @@ +import axios from "axios"; +import crypto from "crypto"; +import createHttpError from "http-errors"; + +// https://megacloud.tv/embed-2/e-1/dBqCr5BcOhnD?k=1 + +const megacloud = { + script: "https://megacloud.tv/js/player/a/prod/e1-player.min.js?v=", + sources: "https://megacloud.tv/embed-2/ajax/e-1/getSources?id=", +} as const; + +type track = { + file: string; + kind: string; + label?: string; + default?: boolean; +}; + +type intro_outro = { + start: number; + end: number; +}; + +type unencryptedSrc = { + file: string; + type: string; +}; + +type extractedSrc = { + sources: string | unencryptedSrc[]; + tracks: track[]; + encrypted: boolean; + intro: intro_outro; + outro: intro_outro; + server: number; +}; + +interface ExtractedData + extends Pick { + sources: { url: string; type: string }[]; +} + +class MegaCloud { + // private serverName = "megacloud"; + + async extract(videoUrl: URL) { + try { + const extractedData: ExtractedData = { + tracks: [], + intro: { + start: 0, + end: 0, + }, + outro: { + start: 0, + end: 0, + }, + sources: [], + }; + + const videoId = videoUrl?.href?.split("/")?.pop()?.split("?")[0]; + const { data: srcsData } = await axios.get( + megacloud.sources.concat(videoId || ""), + { + headers: { + Accept: "*/*", + "X-Requested-With": "XMLHttpRequest", + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", + Referer: videoUrl.href, + }, + } + ); + if (!srcsData) { + throw createHttpError.NotFound("Url may have an invalid video id"); + } + + // console.log(JSON.stringify(srcsData, null, 2)); + + const encryptedString = srcsData.sources; + if (!srcsData.encrypted && Array.isArray(encryptedString)) { + extractedData.intro = srcsData.intro; + extractedData.outro = srcsData.outro; + extractedData.tracks = srcsData.tracks; + extractedData.sources = encryptedString.map((s) => ({ + url: s.file, + type: s.type, + })); + + return extractedData; + } + + let text: string; + const { data } = await axios.get( + megacloud.script.concat(Date.now().toString()) + ); + + text = data; + if (!text) { + throw createHttpError.InternalServerError( + "Couldn't fetch script to decrypt resource" + ); + } + + const vars = this.extractVariables(text); + if (!vars.length) { + throw new Error( + "Can't find variables. Perhaps the extractor is outdated." + ); + } + + const { secret, encryptedSource } = this.getSecret( + encryptedString as string, + vars + ); + const decrypted = this.decrypt(encryptedSource, secret); + try { + const sources = JSON.parse(decrypted); + extractedData.intro = srcsData.intro; + extractedData.outro = srcsData.outro; + extractedData.tracks = srcsData.tracks; + extractedData.sources = sources.map((s: any) => ({ + url: s.file, + type: s.type, + })); + + return extractedData; + } catch (error) { + throw createHttpError.InternalServerError("Failed to decrypt resource"); + } + } catch (err) { + // console.log(err); + throw err; + } + } + + extractVariables(text: string) { + // copied from github issue #30 'https://github.com/ghoshRitesh12/aniwatch-api/issues/30' + const regex = + /case\s*0x[0-9a-f]+:(?![^;]*=partKey)\s*\w+\s*=\s*(\w+)\s*,\s*\w+\s*=\s*(\w+);/g; + const matches = text.matchAll(regex); + const vars = Array.from(matches, (match) => { + const matchKey1 = this.matchingKey(match[1], text); + const matchKey2 = this.matchingKey(match[2], text); + try { + return [parseInt(matchKey1, 16), parseInt(matchKey2, 16)]; + } catch (e) { + return []; + } + }).filter((pair) => pair.length > 0); + + return vars; + } + + getSecret(encryptedString: string, values: number[][]) { + let secret = "", + encryptedSource = "", + encryptedSourceArray = encryptedString.split(""), + currentIndex = 0; + + for (const index of values) { + const start = index[0] + currentIndex; + const end = start + index[1]; + + for (let i = start; i < end; i++) { + secret += encryptedString[i]; + encryptedSourceArray[i] = ""; + } + currentIndex += index[1]; + } + + encryptedSource = encryptedSourceArray.join(""); + + return { secret, encryptedSource }; + } + + decrypt(encrypted: string, keyOrSecret: string, maybe_iv?: string) { + let key; + let iv; + let contents; + if (maybe_iv) { + key = keyOrSecret; + iv = maybe_iv; + contents = encrypted; + } else { + // copied from 'https://github.com/brix/crypto-js/issues/468' + const cypher = Buffer.from(encrypted, "base64"); + const salt = cypher.subarray(8, 16); + const password = Buffer.concat([ + Buffer.from(keyOrSecret, "binary"), + salt, + ]); + const md5Hashes = []; + let digest = password; + for (let i = 0; i < 3; i++) { + md5Hashes[i] = crypto.createHash("md5").update(digest).digest(); + digest = Buffer.concat([md5Hashes[i], password]); + } + key = Buffer.concat([md5Hashes[0], md5Hashes[1]]); + iv = md5Hashes[2]; + contents = cypher.subarray(16); + } + + const decipher = crypto.createDecipheriv("aes-256-cbc", key, iv); + const decrypted = + decipher.update( + contents as any, + typeof contents === "string" ? "base64" : undefined, + "utf8" + ) + decipher.final(); + + return decrypted; + } + + // function copied from github issue #30 'https://github.com/ghoshRitesh12/aniwatch-api/issues/30' + matchingKey(value: string, script: string) { + const regex = new RegExp(`,${value}=((?:0x)?([0-9a-fA-F]+))`); + const match = script.match(regex); + if (match) { + return match[1].replace(/^0x/, ""); + } else { + throw new Error("Failed to match the key"); + } + } +} + +export default MegaCloud; diff --git a/src/extractors/rapidcloud.ts b/src/extractors/rapidcloud.ts new file mode 100644 index 0000000..64d211e --- /dev/null +++ b/src/extractors/rapidcloud.ts @@ -0,0 +1,166 @@ +import axios from "axios"; +import CryptoJS from "crypto-js"; +import { substringAfter, substringBefore } from "../utils/index.js"; +import type { Video, Subtitle, Intro } from "../types/extractor.js"; + +type extractReturn = { + sources: Video[]; + subtitles: Subtitle[]; +}; + +// https://megacloud.tv/embed-2/e-1/IxJ7GjGVCyml?k=1 +class RapidCloud { + // private serverName = "RapidCloud"; + private sources: Video[] = []; + + // https://rapid-cloud.co/embed-6/eVZPDXwVfrY3?vast=1 + private readonly fallbackKey = "c1d17096f2ca11b7"; + private readonly host = "https://rapid-cloud.co"; + + async extract(videoUrl: URL): Promise { + const result: extractReturn & { intro?: Intro; outro?: Intro } = { + sources: [], + subtitles: [], + }; + + try { + const id = videoUrl.href.split("/").pop()?.split("?")[0]; + const options = { + headers: { + "X-Requested-With": "XMLHttpRequest", + }, + }; + + let res = null; + + res = await axios.get( + `https://${videoUrl.hostname}/embed-2/ajax/e-1/getSources?id=${id}`, + options + ); + + let { + data: { sources, tracks, intro, outro, encrypted }, + } = res; + + let decryptKey = await ( + await axios.get( + "https://raw.githubusercontent.com/cinemaxhq/keys/e1/key" + ) + ).data; + + decryptKey = substringBefore( + substringAfter(decryptKey, '"blob-code blob-code-inner js-file-line">'), + "" + ); + + if (!decryptKey) { + decryptKey = await ( + await axios.get( + "https://raw.githubusercontent.com/cinemaxhq/keys/e1/key" + ) + ).data; + } + + if (!decryptKey) decryptKey = this.fallbackKey; + + try { + if (encrypted) { + const sourcesArray = sources.split(""); + let extractedKey = ""; + let currentIndex = 0; + + for (const index of decryptKey) { + const start = index[0] + currentIndex; + const end = start + index[1]; + + for (let i = start; i < end; i++) { + extractedKey += res.data.sources[i]; + sourcesArray[i] = ""; + } + currentIndex += index[1]; + } + + decryptKey = extractedKey; + sources = sourcesArray.join(""); + + const decrypt = CryptoJS.AES.decrypt(sources, decryptKey); + sources = JSON.parse(decrypt.toString(CryptoJS.enc.Utf8)); + } + } catch (err: any) { + console.log(err.message); + throw new Error("Cannot decrypt sources. Perhaps the key is invalid."); + } + + this.sources = sources?.map((s: any) => ({ + url: s.file, + isM3U8: s.file.includes(".m3u8"), + })); + + result.sources.push(...this.sources); + + if (videoUrl.href.includes(new URL(this.host).host)) { + result.sources = []; + this.sources = []; + + for (const source of sources) { + const { data } = await axios.get(source.file, options); + const m3u8data = data + .split("\n") + .filter( + (line: string) => + line.includes(".m3u8") && line.includes("RESOLUTION=") + ); + + const secondHalf = m3u8data.map((line: string) => + line.match(/RESOLUTION=.*,(C)|URI=.*/g)?.map((s) => s.split("=")[1]) + ); + + const TdArray = secondHalf.map((s: string[]) => { + const f1 = s[0].split(",C")[0]; + const f2 = s[1].replace(/"/g, ""); + + return [f1, f2]; + }); + + for (const [f1, f2] of TdArray) { + this.sources.push({ + url: `${source.file?.split("master.m3u8")[0]}${f2.replace( + "iframes", + "index" + )}`, + quality: f1.split("x")[1] + "p", + isM3U8: f2.includes(".m3u8"), + }); + } + result.sources.push(...this.sources); + } + } + + result.intro = + intro?.end > 1 ? { start: intro.start, end: intro.end } : undefined; + result.outro = + outro?.end > 1 ? { start: outro.start, end: outro.end } : undefined; + + result.sources.push({ + url: sources[0].file, + isM3U8: sources[0].file.includes(".m3u8"), + quality: "auto", + }); + + result.subtitles = tracks + .map((s: any) => + s.file + ? { url: s.file, lang: s.label ? s.label : "Thumbnails" } + : null + ) + .filter((s: any) => s); + + return result; + } catch (err: any) { + console.log(err.message); + throw err; + } + } +} + +export default RapidCloud; diff --git a/src/extractors/streamsb.ts b/src/extractors/streamsb.ts new file mode 100644 index 0000000..cff7945 --- /dev/null +++ b/src/extractors/streamsb.ts @@ -0,0 +1,83 @@ +import axios from "axios"; +import type { Video } from "../types/extractor.js"; +import { USER_AGENT_HEADER } from "../utils/index.js"; + +class StreamSB { + // private serverName = "streamSB"; + private sources: Video[] = []; + + private readonly host = "https://watchsb.com/sources50"; + private readonly host2 = "https://streamsss.net/sources16"; + + private PAYLOAD(hex: string): string { + // `5363587530696d33443675687c7c${hex}7c7c433569475830474c497a65767c7c73747265616d7362`; + return `566d337678566f743674494a7c7c${hex}7c7c346b6767586d6934774855537c7c73747265616d7362/6565417268755339773461447c7c346133383438333436313335376136323337373433383634376337633465366534393338373136643732373736343735373237613763376334363733353737303533366236333463353333363534366137633763373337343732363536313664373336327c7c6b586c3163614468645a47617c7c73747265616d7362`; + } + + async extract(videoUrl: URL, isAlt: boolean = false): Promise { + let headers: Record = { + watchsb: "sbstream", + Referer: videoUrl.href, + "User-Agent": USER_AGENT_HEADER, + }; + let id = videoUrl.href.split("/e/").pop(); + if (id?.includes("html")) { + id = id.split(".html")[0]; + } + const bytes = new TextEncoder().encode(id); + + const res = await axios + .get( + `${isAlt ? this.host2 : this.host}/${this.PAYLOAD( + Buffer.from(bytes).toString("hex") + )}`, + { headers } + ) + .catch(() => null); + + if (!res?.data.stream_data) { + throw new Error("No source found. Try a different server"); + } + + headers = { + "User-Agent": USER_AGENT_HEADER, + Referer: videoUrl.href.split("e/")[0], + }; + + const m3u8_urls = await axios.get(res.data.stream_data.file, { + headers, + }); + + const videoList = m3u8_urls?.data?.split("#EXT-X-STREAM-INF:") ?? []; + + for (const video of videoList) { + if (!video.includes("m3u8")) continue; + + const url = video.split("\n")[1]; + const quality = video.split("RESOLUTION=")[1].split(",")[0].split("x")[1]; + + this.sources.push({ + url: url, + quality: `${quality}p`, + isM3U8: true, + }); + } + + this.sources.push({ + url: res.data.stream_data.file, + quality: "auto", + isM3U8: res.data.stream_data.file.includes(".m3u8"), + }); + + return this.sources; + } + + // private addSources(source: any): void { + // this.sources.push({ + // url: source.file, + // isM3U8: source.file.includes(".m3u8"), + // }); + // } +} + +export default StreamSB; diff --git a/src/extractors/streamtape.ts b/src/extractors/streamtape.ts new file mode 100644 index 0000000..c25add1 --- /dev/null +++ b/src/extractors/streamtape.ts @@ -0,0 +1,37 @@ +import axios from "axios"; +import { load, type CheerioAPI } from "cheerio"; +import type { Video } from "../types/extractor.js"; + +class StreamTape { + // private serverName = "StreamTape"; + private sources: Video[] = []; + + async extract(videoUrl: URL): Promise { + try { + const { data } = await axios.get(videoUrl.href).catch(() => { + throw new Error("Video not found"); + }); + + const $: CheerioAPI = load(data); + + let [fh, sh] = $.html() + ?.match(/robotlink'\).innerHTML = (.*)'/)![1] + .split("+ ('"); + + sh = sh.substring(3); + fh = fh.replace(/\'/g, ""); + + const url = `https:${fh}${sh}`; + + this.sources.push({ + url: url, + isM3U8: url.includes(".m3u8"), + }); + + return this.sources; + } catch (err) { + throw new Error((err as Error).message); + } + } +} +export default StreamTape;