Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouTube] Adjust throttling function extraction to changes #1191

Merged
merged 8 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.schabi.newpipe.extractor.services.youtube;

import static org.schabi.newpipe.extractor.utils.Parser.matchGroup1MultiplePatterns;

import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;
Expand All @@ -20,13 +22,13 @@ final class YoutubeSignatureUtils {
*/
static final String DEOBFUSCATION_FUNCTION_NAME = "deobfuscate";

private static final String[] FUNCTION_REGEXES = {
"\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)",
"\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)",
private static final Pattern[] FUNCTION_REGEXES = {
// CHECKSTYLE:OFF
"(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)",
Pattern.compile("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)"),
Pattern.compile("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)"),
Pattern.compile("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)"),
Pattern.compile("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;")
// CHECKSTYLE:ON
"([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;"
};

private static final String STS_REGEX = "signatureTimestamp[=:](\\d+)";
Expand Down Expand Up @@ -104,19 +106,12 @@ static String getDeobfuscationCode(@Nonnull final String javaScriptPlayerCode)
@Nonnull
private static String getDeobfuscationFunctionName(@Nonnull final String javaScriptPlayerCode)
throws ParsingException {
Parser.RegexException exception = null;
for (final String regex : FUNCTION_REGEXES) {
try {
return Parser.matchGroup1(regex, javaScriptPlayerCode);
} catch (final Parser.RegexException e) {
if (exception == null) {
exception = e;
}
}
try {
return matchGroup1MultiplePatterns(FUNCTION_REGEXES, javaScriptPlayerCode);
} catch (final Parser.RegexException e) {
throw new ParsingException(
"Could not find deobfuscation function with any of the known patterns", e);
}

throw new ParsingException(
"Could not find deobfuscation function with any of the known patterns", exception);
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.schabi.newpipe.extractor.services.youtube;

import static org.schabi.newpipe.extractor.utils.Parser.matchMultiplePatterns;

import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;
Expand All @@ -18,10 +20,33 @@ final class YoutubeThrottlingParameterUtils {

private static final Pattern THROTTLING_PARAM_PATTERN = Pattern.compile("[&?]n=([^&]+)");

private static final Pattern DEOBFUSCATION_FUNCTION_NAME_PATTERN = Pattern.compile(
// CHECKSTYLE:OFF
"\\.get\\(\"n\"\\)\\)&&\\([a-zA-Z0-9$_]=([a-zA-Z0-9$_]+)(?:\\[(\\d+)])?\\([a-zA-Z0-9$_]\\)");
// CHECKSTYLE:ON
private static final String SINGLE_CHAR_VARIABLE_REGEX = "[a-zA-Z0-9$_]";

private static final String FUNCTION_NAME_REGEX = SINGLE_CHAR_VARIABLE_REGEX + "+";

private static final String ARRAY_ACCESS_REGEX = "\\[(\\d+)]";

/**
* The first regex matches this, where we want BDa:
* <p>
* (b=String.fromCharCode(110),c=a.get(b))&&(c=<strong>BDa</strong><strong>[0]</strong>(c)
* <p>
* Array access is optional, but needs to be handled, since the actual function is inside the
* array.
*/
// CHECKSTYLE:OFF
private static final Pattern[] DEOBFUSCATION_FUNCTION_NAME_REGEXES = {
Pattern.compile("\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=String\\.fromCharCode\\(110\\),"
+ SINGLE_CHAR_VARIABLE_REGEX + "=" + SINGLE_CHAR_VARIABLE_REGEX + "\\.get\\("
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)\\)" + "&&\\(" + SINGLE_CHAR_VARIABLE_REGEX
+ "=(" + FUNCTION_NAME_REGEX + ")" + "(?:" + ARRAY_ACCESS_REGEX + ")?\\("
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)"),
Pattern.compile("\\.get\\(\"n\"\\)\\)&&\\(" + SINGLE_CHAR_VARIABLE_REGEX
+ "=(" + FUNCTION_NAME_REGEX + ")(?:" + ARRAY_ACCESS_REGEX + ")?\\("
+ SINGLE_CHAR_VARIABLE_REGEX + "\\)"),
};
// CHECKSTYLE:ON


// Escape the curly end brace to allow compatibility with Android's regex engine
// See https://stackoverflow.com/q/45074813
Expand All @@ -48,11 +73,13 @@ private YoutubeThrottlingParameterUtils() {
@Nonnull
static String getDeobfuscationFunctionName(@Nonnull final String javaScriptPlayerCode)
throws ParsingException {
final Matcher matcher = DEOBFUSCATION_FUNCTION_NAME_PATTERN.matcher(javaScriptPlayerCode);
if (!matcher.find()) {
throw new ParsingException("Failed to find deobfuscation function name pattern \""
+ DEOBFUSCATION_FUNCTION_NAME_PATTERN
+ "\" in the base JavaScript player code");
final Matcher matcher;
try {
matcher = matchMultiplePatterns(DEOBFUSCATION_FUNCTION_NAME_REGEXES,
javaScriptPlayerCode);
} catch (final Parser.RegexException e) {
throw new ParsingException("Could not find deobfuscation function with any of the "
+ "known patterns in the base JavaScript player code", e);
}

final String functionName = matcher.group(1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,37 @@ public static String matchGroup(@Nonnull final Pattern pat,
}
}

public static String matchGroup1MultiplePatterns(final Pattern[] patterns, final String input)
throws RegexException {
return matchMultiplePatterns(patterns, input).group(1);
}

public static Matcher matchMultiplePatterns(final Pattern[] patterns, final String input)
throws RegexException {
Parser.RegexException exception = null;
for (final Pattern pattern : patterns) {
final Matcher matcher = pattern.matcher(input);
if (matcher.find()) {
return matcher;
} else if (exception == null) {
// only pass input to exception message when it is not too long
if (input.length() > 1024) {
exception = new RegexException("Failed to find pattern \"" + pattern.pattern()
+ "\"");
} else {
XiangRongLin marked this conversation as resolved.
Show resolved Hide resolved
exception = new RegexException("Failed to find pattern \"" + pattern.pattern()
+ "\" inside of \"" + input + "\"");
}
}
}

if (exception == null) {
throw new RegexException("Empty patterns array passed to matchMultiplePatterns");
} else {
throw exception;
}
}

public static boolean isMatch(final String pattern, final String input) {
final Pattern pat = Pattern.compile(pattern);
final Matcher mat = pat.matcher(input);
Expand Down
Loading