Skip to content

Commit

Permalink
fix: bring back web player parser for age restricted videos
Browse files Browse the repository at this point in the history
  • Loading branch information
skick1234 committed Aug 7, 2024
1 parent da7de6b commit a1f57f5
Show file tree
Hide file tree
Showing 2 changed files with 332 additions and 23 deletions.
71 changes: 48 additions & 23 deletions lib/info.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const formatUtils = require('./format-utils');
const urlUtils = require('./url-utils');
const extras = require('./info-extras');
const Cache = require('./cache');
const { defaultAgent } = require('./agent');
const sig = require('./sig');


const BASE_URL = 'https://www.youtube.com/watch?v=';
Expand Down Expand Up @@ -217,30 +217,56 @@ const parseFormats = player_response => {
* @returns {Promise<Object>}
*/
exports.getInfo = async(id, options) => {
utils.applyIPv6Rotations(options);
utils.applyDefaultHeaders(options);
utils.applyDefaultAgent(options);
utils.applyOldLocalAddress(options);
const info = await exports.getBasicInfo(id, options);
const [iosPlayerResponse, androidPlayerResponse] = await Promise.all([
fetchIosJsonPlayer(id, options),
fetchAndroidJsonPlayer(id, options),
]);
info.formats = parseFormats(androidPlayerResponse).concat(parseFormats(iosPlayerResponse));
let funcs = [];
if (info.formats.length) {
funcs.push(info.formats);
}
if (androidPlayerResponse && androidPlayerResponse.streamingData) {
if (androidPlayerResponse.streamingData.dashManifestUrl) {
funcs.push(getDashManifest(androidPlayerResponse.streamingData.dashManifestUrl, options));
try {
if (info.videoDetails.age_restricted) throw Error('Cannot download age restricted videos with mobile clients');
const [iosPlayerResponse, androidPlayerResponse] = await Promise.all([
fetchIosJsonPlayer(id, options),
fetchAndroidJsonPlayer(id, options),
]);
info.formats = parseFormats(androidPlayerResponse).concat(parseFormats(iosPlayerResponse));
if (info.formats.length) {
funcs.push(info.formats);
}
if (androidPlayerResponse.streamingData.hlsManifestUrl) {
funcs.push(getM3U8(androidPlayerResponse.streamingData.hlsManifestUrl, options));
if (androidPlayerResponse && androidPlayerResponse.streamingData) {
if (androidPlayerResponse.streamingData.dashManifestUrl) {
funcs.push(getDashManifest(androidPlayerResponse.streamingData.dashManifestUrl, options));
}
if (androidPlayerResponse.streamingData.hlsManifestUrl) {
funcs.push(getM3U8(androidPlayerResponse.streamingData.hlsManifestUrl, options));
}
}
}
if (iosPlayerResponse && iosPlayerResponse.streamingData) {
if (iosPlayerResponse.streamingData.dashManifestUrl) {
funcs.push(getDashManifest(iosPlayerResponse.streamingData.dashManifestUrl, options));
if (iosPlayerResponse && iosPlayerResponse.streamingData) {
if (iosPlayerResponse.streamingData.dashManifestUrl) {
funcs.push(getDashManifest(iosPlayerResponse.streamingData.dashManifestUrl, options));
}
if (iosPlayerResponse.streamingData.hlsManifestUrl) {
funcs.push(getM3U8(iosPlayerResponse.streamingData.hlsManifestUrl, options));
}
}
if (iosPlayerResponse.streamingData.hlsManifestUrl) {
funcs.push(getM3U8(iosPlayerResponse.streamingData.hlsManifestUrl, options));
} catch (_) {
// Bring back web-scraping for now. TODO: tv client
info.html5player = info.html5player ||
getHTML5player(await getWatchHTMLPageBody(id, options)) || getHTML5player(await getEmbedPageBody(id, options));
if (!info.html5player) {
throw Error('Unable to find html5player file');
}
const html5player = new URL(info.html5player, BASE_URL).toString();
funcs.push(sig.decipherFormats(parseFormats(info.player_response), html5player, options));
if (info.player_response && info.player_response.streamingData) {
if (info.player_response.streamingData.dashManifestUrl) {
let url = info.player_response.streamingData.dashManifestUrl;
funcs.push(getDashManifest(url, options));
}
if (info.player_response.streamingData.hlsManifestUrl) {
let url = info.player_response.streamingData.hlsManifestUrl;
funcs.push(getM3U8(url, options));
}
}
}

Expand All @@ -249,7 +275,6 @@ exports.getInfo = async(id, options) => {
info.formats = info.formats.map(formatUtils.addFormatMeta);
info.formats.sort(formatUtils.sortFormats);


info.full = true;
return info;
};
Expand Down Expand Up @@ -288,7 +313,7 @@ const fetchIosJsonPlayer = async(videoId, options) => {
},
};

const { jar, dispatcher } = options.agent || defaultAgent;
const { jar, dispatcher } = options.agent;
const opts = {
requestOptions: {
method: 'POST',
Expand Down Expand Up @@ -353,7 +378,7 @@ const fetchAndroidJsonPlayer = async(videoId, options) => {
},
};

const { jar, dispatcher } = options.agent || defaultAgent;
const { jar, dispatcher } = options.agent;
const opts = {
requestOptions: {
method: 'POST',
Expand Down
284 changes: 284 additions & 0 deletions lib/sig.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
const querystring = require('querystring');
const Cache = require('./cache');
const utils = require('./utils');
const vm = require('vm');

// A shared cache to keep track of html5player js functions.
exports.cache = new Cache(1);

/**
* Extract signature deciphering and n parameter transform functions from html5player file.
*
* @param {string} html5playerfile
* @param {Object} options
* @returns {Promise<Array.<string>>}
*/
exports.getFunctions = (html5playerfile, options) => exports.cache.getOrSet(html5playerfile, async() => {
const body = await utils.request(html5playerfile, options);
const functions = exports.extractFunctions(body);
exports.cache.set(html5playerfile, functions);
return functions;
});

// NewPipeExtractor regexps
const DECIPHER_NAME_REGEXPS = [
'\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)',
'\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)',
// eslint-disable-next-line max-len
'(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*""\\s*\\)',
'([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;',
];

// LavaPlayer regexps
const VARIABLE_PART = '[a-zA-Z_\\$][a-zA-Z_0-9]*';
const VARIABLE_PART_DEFINE = `\\"?${VARIABLE_PART}\\"?`;
const BEFORE_ACCESS = '(?:\\[\\"|\\.)';
const AFTER_ACCESS = '(?:\\"\\]|)';
const VARIABLE_PART_ACCESS = BEFORE_ACCESS + VARIABLE_PART + AFTER_ACCESS;
const REVERSE_PART = ':function\\(a\\)\\{(?:return )?a\\.reverse\\(\\)\\}';
const SLICE_PART = ':function\\(a,b\\)\\{return a\\.slice\\(b\\)\\}';
const SPLICE_PART = ':function\\(a,b\\)\\{a\\.splice\\(0,b\\)\\}';
const SWAP_PART = ':function\\(a,b\\)\\{' +
'var c=a\\[0\\];a\\[0\\]=a\\[b%a\\.length\\];a\\[b(?:%a.length|)\\]=c(?:;return a)?\\}';

const DECIPHER_REGEXP = `function(?: ${VARIABLE_PART})?\\(a\\)\\{` +
`a=a\\.split\\(""\\);\\s*` +
`((?:(?:a=)?${VARIABLE_PART}${VARIABLE_PART_ACCESS}\\(a,\\d+\\);)+)` +
`return a\\.join\\(""\\)` +
`\\}`;

const HELPER_REGEXP = `var (${VARIABLE_PART})=\\{((?:(?:${
VARIABLE_PART_DEFINE}${REVERSE_PART}|${
VARIABLE_PART_DEFINE}${SLICE_PART}|${
VARIABLE_PART_DEFINE}${SPLICE_PART}|${
VARIABLE_PART_DEFINE}${SWAP_PART}),?\\n?)+)\\};`;

const SCVR = '[a-zA-Z0-9$_]';
const FNR = `${SCVR}+`;
const AAR = '\\[(\\d+)]';
const N_TRANSFORM_NAME_REGEXPS = [
// NewPipeExtractor regexps
`${SCVR}+="nn"\\[\\+${
SCVR}+\\.${SCVR}+],${
SCVR}+=${SCVR
}+\\.get\\(${SCVR}+\\)\\)&&\\(${
SCVR}+=(${SCVR
}+)\\[(\\d+)]`,
`${SCVR}+="nn"\\[\\+${
SCVR}+\\.${SCVR}+],${
SCVR}+=${SCVR}+\\.get\\(${
SCVR}+\\)\\).+\\|\\|(${SCVR
}+)\\(""\\)`,
`\\(${SCVR}=String\\.fromCharCode\\(110\\),${
SCVR}=${SCVR}\\.get\\(${
SCVR}\\)\\)&&\\(${SCVR
}=(${FNR})(?:${AAR})?\\(${
SCVR}\\)`,
`\\.get\\("n"\\)\\)&&\\(${SCVR
}=(${FNR})(?:${AAR})?\\(${
SCVR}\\)`,
// Skick regexps
'(\\w+).length\\|\\|\\w+\\(""\\)',
'\\w+.length\\|\\|(\\w+)\\(""\\)',
];

// LavaPlayer regexps
const N_TRANSFORM_REGEXP = 'function\\(\\s*(\\w+)\\s*\\)\\s*\\{' +
'var\\s*(\\w+)=(?:\\1\\.split\\(""\\)|String\\.prototype\\.split\\.call\\(\\1,""\\)),' +
'\\s*(\\w+)=(\\[.*?]);\\s*\\3\\[\\d+]' +
'(.*?try)(\\{.*?})catch\\(\\s*(\\w+)\\s*\\)\\s*\\' +
'{\\s*return"enhanced_except_([A-z0-9-]+)"\\s*\\+\\s*\\1\\s*}' +
'\\s*return\\s*(\\2\\.join\\(""\\)|Array\\.prototype\\.join\\.call\\(\\2,""\\))};';

const DECIPHER_ARGUMENT = 'sig';
const N_ARGUMENT = 'ncode';

const matchRegex = (regex, str) => {
const match = str.match(new RegExp(regex, 's'));
if (!match) throw new Error(`Could not match ${regex}`);
return match;
};

const matchFirst = (regex, str) => matchRegex(regex, str)[0];

const matchGroup1 = (regex, str) => matchRegex(regex, str)[1];

const getFuncName = (body, regexps) => {
let fn;
for (const regex of regexps) {
try {
fn = matchGroup1(regex, body);
try {
fn = matchGroup1(`${fn.replace(/\$/g, '\\$')}=\\[([a-zA-Z0-9$\\[\\]]{2,})\\]`, body);
} catch (err) {
// Function name is not inside an array
}
break;
} catch (err) {
continue;
}
}
if (!fn || fn.includes('[')) throw Error();
return fn;
};

const DECIPHER_FUNC_NAME = 'DisTubeDecipherFunc';
const extractDecipherFunc = body => {
try {
const helperObject = matchFirst(HELPER_REGEXP, body);
const decipherFunc = matchFirst(DECIPHER_REGEXP, body);
const resultFunc = `var ${DECIPHER_FUNC_NAME}=${decipherFunc};`;
const callerFunc = `${DECIPHER_FUNC_NAME}(${DECIPHER_ARGUMENT});`;
return helperObject + resultFunc + callerFunc;
} catch (e) {
return null;
}
};

const extractDecipherWithName = body => {
try {
const decipherFuncName = getFuncName(body, DECIPHER_NAME_REGEXPS);
const funcPattern = `(${decipherFuncName.replace(/\$/g, '\\$')}=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})`;
const decipherFunc = `var ${matchGroup1(funcPattern, body)};`;
const helperObjectName = matchGroup1(';([A-Za-z0-9_\\$]{2,})\\.\\w+\\(', decipherFunc);
const helperPattern = `(var ${helperObjectName.replace(/\$/g, '\\$')}=\\{[\\s\\S]+?\\}\\};)`;
const helperObject = matchGroup1(helperPattern, body);
const callerFunc = `${decipherFuncName}(${DECIPHER_ARGUMENT});`;
return helperObject + decipherFunc + callerFunc;
} catch (e) {
return null;
}
};

const getExtractFunctions = (extractFunctions, body) => {
for (const extractFunction of extractFunctions) {
try {
const func = extractFunction(body);
if (!func) continue;
return new vm.Script(func);
} catch (err) {
continue;
}
}
return null;
};

let decipherWarning = false;
// This is required function to get the stream url, but we can continue if user doesn't need stream url.
const extractDecipher = body => {
// Faster: extractDecipherWithName
const decipherFunc = getExtractFunctions([extractDecipherWithName, extractDecipherFunc], body);
if (!decipherFunc && !decipherWarning) {
console.warn('\x1b[33mWARNING:\x1B[0m Could not parse decipher function.\n' +
`Please report this issue with the "${
utils.saveDebugFile('base.js', body)
}" file on https://github.com/distubejs/ytdl-core/issues.\nStream URL will be missing.`);
decipherWarning = true;
}
return decipherFunc;
};

const N_TRANSFORM_FUNC_NAME = 'DisTubeNTransformFunc';
const extractNTransformFunc = body => {
try {
const nFunc = matchFirst(N_TRANSFORM_REGEXP, body);
const resultFunc = `var ${N_TRANSFORM_FUNC_NAME}=${nFunc}`;
const callerFunc = `${N_TRANSFORM_FUNC_NAME}(${N_ARGUMENT});`;
return resultFunc + callerFunc;
} catch (e) {
return null;
}
};

const extractNTransformWithName = body => {
try {
const nFuncName = getFuncName(body, N_TRANSFORM_NAME_REGEXPS);
const funcPattern = `(${
nFuncName.replace(/\$/g, '\\$')
// eslint-disable-next-line max-len
}=\\s*function([\\S\\s]*?\\}\\s*return (([\\w$]+?\\.join\\(""\\))|(Array\\.prototype\\.join\\.call\\([\\w$]+?,[\\n\\s]*(("")|(\\("",""\\)))\\)))\\s*\\}))`;
const nTransformFunc = `var ${matchGroup1(funcPattern, body)};`;
const callerFunc = `${nFuncName}(${N_ARGUMENT});`;
return nTransformFunc + callerFunc;
} catch (e) {
return null;
}
};

let nTransformWarning = false;
const extractNTransform = body => {
// Faster: extractNTransformFunc
const nTransformFunc = getExtractFunctions([extractNTransformFunc, extractNTransformWithName], body);
if (!nTransformFunc && !nTransformWarning) {
// This is optional, so we can continue if it's not found, but it will bottleneck the download.
console.warn('\x1b[33mWARNING:\x1B[0m Could not parse n transform function.\n' +
`Please report this issue with the "${
utils.saveDebugFile('base.js', body)
}" file on https://github.com/distubejs/ytdl-core/issues.`);
nTransformWarning = true;
}
return nTransformFunc;
};

/**
* Extracts the actions that should be taken to decipher a signature
* and transform the n parameter
*
* @param {string} body
* @returns {Array.<string>}
*/
exports.extractFunctions = body => [
extractDecipher(body),
extractNTransform(body),
];

/**
* Apply decipher and n-transform to individual format
*
* @param {Object} format
* @param {vm.Script} decipherScript
* @param {vm.Script} nTransformScript
*/
exports.setDownloadURL = (format, decipherScript, nTransformScript) => {
if (!decipherScript) return;
const decipher = url => {
const args = querystring.parse(url);
if (!args.s) return args.url;
const components = new URL(decodeURIComponent(args.url));
const context = {};
context[DECIPHER_ARGUMENT] = decodeURIComponent(args.s);
components.searchParams.set(args.sp || 'sig', decipherScript.runInNewContext(context));
return components.toString();
};
const nTransform = url => {
const components = new URL(decodeURIComponent(url));
const n = components.searchParams.get('n');
if (!n || !nTransformScript) return url;
const context = {};
context[N_ARGUMENT] = n;
components.searchParams.set('n', nTransformScript.runInNewContext(context));
return components.toString();
};
const cipher = !format.url;
const url = format.url || format.signatureCipher || format.cipher;
format.url = nTransform(cipher ? decipher(url) : url);
delete format.signatureCipher;
delete format.cipher;
};

/**
* Applies decipher and n parameter transforms to all format URL's.
*
* @param {Array.<Object>} formats
* @param {string} html5player
* @param {Object} options
*/
exports.decipherFormats = async(formats, html5player, options) => {
const decipheredFormats = {};
const [decipherScript, nTransformScript] = await exports.getFunctions(html5player, options);
formats.forEach(format => {
exports.setDownloadURL(format, decipherScript, nTransformScript);
decipheredFormats[format.url] = format;
});
return decipheredFormats;
};

0 comments on commit a1f57f5

Please sign in to comment.