Skip to content

Commit

Permalink
[YouTube] Improve fix for ae8ba2c
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkf committed Jun 17, 2023
1 parent ae8ba2c commit 07af479
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
4 changes: 1 addition & 3 deletions youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1569,9 +1569,7 @@ def _parse_sig_js(self, jscode):
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')

# temporary (please) hack for player 6ed0d907 #32314
ah = 'var AH={LR:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c},QV:function(a){a.reverse()},pO:function(a,b){a.splice(0,b)}};'
jsi = JSInterpreter(ah + jscode)
jsi = JSInterpreter(jscode)

initial_function = jsi.extract_function(funcname)

Expand Down
21 changes: 12 additions & 9 deletions youtube_dl/jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,15 +940,18 @@ def interpret_iter(self, list_txt, local_vars, allow_recursion):
def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {}
obj_m = re.search(
r'''(?xs)
(?:{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}}\s*);
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
self.code)
fields = obj_m and obj_m.group('fields')
if fields is None:
fields = None
for obj_m in re.finditer(
r'''(?xs)
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}}\s*;
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
self.code):
fields = obj_m.group('fields')
if fields:
break
else:
raise self.Exception('Could not find object ' + objname)
# Currently, it only supports function definitions
fields_m = re.finditer(
Expand Down

2 comments on commit 07af479

@pukkandan
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this. Could you pls explain? As I read it, either:

  • {0}\s*\.\s*{1} matches and since it has no fields, nothing happens - what's the point?
  • Or the rest matches and since it always have fields, the first match is used

Which would mean this does the exact same thing as the simpler yt-dlp code. What am I missing?

@dirkf
Copy link
Contributor Author

@dirkf dirkf commented on 07af479 Jun 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was a minimal change from "not matching something preceded by this.".

As you rightly identify, the code implements "not matching something preceded by {_NAME_RE}.", which is how I proposed to change the existing match; this pattern is a way to achieve (?<!...) when the length of ... isn't fixed.

Thinking about your commit I almost convinced myself that in the context the much simpler test is equivalent, or better, since we don't want to match bare .{objname} either , I suppose. But I didn't fully understand the rationale for just skipping this. originally.

Please sign in to comment.