From 5b020e81cae7db93d67b9228e14fe94778966be6 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Tue, 3 Aug 2021 02:58:27 +0200 Subject: [PATCH] Youtube api improvements (#2277) * Put youtube API functions under the YoutubeAPI namespace * Implement the following endpoints: - `next` - `player` - `resolve_url` * Allow a ClientConfig to be passed to YoutubeAPI endpoint handlers. * Add constants for many new clients * Fix documentation of YoutubeAPI.browse(): Comments and search result aren't returned by the browse() endpoint but by the next() and search() endpoints, respectively. * Accept gzip compressed data, to help save on bandwidth * Add debug/trace logging * Other minor fixes --- src/invidious/channels/playlists.cr | 2 +- src/invidious/channels/videos.cr | 2 +- src/invidious/helpers/youtube_api.cr | 528 ++++++++++++++++++++++----- src/invidious/playlists.cr | 6 +- src/invidious/search.cr | 5 +- src/invidious/trending.cr | 3 +- 6 files changed, 439 insertions(+), 107 deletions(-) diff --git a/src/invidious/channels/playlists.cr b/src/invidious/channels/playlists.cr index 222ec2b18..393b055e6 100644 --- a/src/invidious/channels/playlists.cr +++ b/src/invidious/channels/playlists.cr @@ -1,6 +1,6 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by) if continuation - response_json = request_youtube_api_browse(continuation) + response_json = YoutubeAPI.browse(continuation) continuationItems = response_json["onResponseReceivedActions"]? .try &.[0]["appendContinuationItemsAction"]["continuationItems"] diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index cc291e9e3..2c43bf0b1 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -61,7 +61,7 @@ def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) - return request_youtube_api_browse(continuation) + return YoutubeAPI.browse(continuation) end def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index 734fddcda..a5b4b2d59 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -2,120 +2,450 @@ # This file contains youtube API wrappers # -# Hard-coded constants required by the API -HARDCODED_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" -HARDCODED_CLIENT_VERS = "2.20210330.08.00" +module YoutubeAPI + extend self -#################################################################### -# make_youtube_api_context(region) -# -# Return, as a Hash, the "context" data required to request the -# youtube API endpoints. -# -def make_youtube_api_context(region : String | Nil) : Hash - return { - "client" => { - "hl" => "en", - "gl" => region || "US", # Can't be empty! - "clientName" => "WEB", - "clientVersion" => HARDCODED_CLIENT_VERS, + # Enumerate used to select one of the clients supported by the API + enum ClientType + Web + WebEmbed + WebMobile + WebAgeBypass + Android + AndroidEmbed + AndroidAgeBypass + end + + # List of hard-coded values used by the different clients + HARDCODED_CLIENTS = { + ClientType::Web => { + name: "WEB", + version: "2.20210721.00.00", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "WATCH_FULL_SCREEN", + }, + ClientType::WebEmbed => { + name: "WEB_EMBEDDED_PLAYER", # 56 + version: "1.20210721.1.0", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "EMBED", + }, + ClientType::WebMobile => { + name: "MWEB", + version: "2.20210726.08.00", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "", # None + }, + ClientType::WebAgeBypass => { + name: "WEB", + version: "2.20210721.00.00", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "EMBED", + }, + ClientType::Android => { + name: "ANDROID", + version: "16.20", + api_key: "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w", + screen: "", # ?? + }, + ClientType::AndroidEmbed => { + name: "ANDROID_EMBEDDED_PLAYER", # 55 + version: "16.20", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "", # None? + }, + ClientType::AndroidAgeBypass => { + name: "ANDROID", # 3 + version: "16.20", + api_key: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", + screen: "EMBED", }, } -end -#################################################################### -# request_youtube_api_browse(continuation) -# request_youtube_api_browse(browse_id, params, region) -# -# Requests the youtubei/v1/browse endpoint with the required headers -# and POST data in order to get a JSON reply in english that can -# be easily parsed. -# -# The region can be provided, default is US. -# -# The requested data can either be: -# -# - A continuation token (ctoken). Depending on this token's -# contents, the returned data can be comments, playlist videos, -# search results, channel community tab, ... -# -# - A playlist ID (parameters MUST be an empty string) -# -def request_youtube_api_browse(continuation : String) - # JSON Request data, required by the API - data = { - "context" => make_youtube_api_context("US"), - "continuation" => continuation, - } + #################################################################### + # struct ClientConfig + # + # Data structure used to pass a client configuration to the different + # API endpoints handlers. + # + # Use case examples: + # + # ``` + # # Get Norwegian search results + # conf_1 = ClientConfig.new(region: "NO") + # YoutubeAPI::search("Kollektivet", params: "", client_config: conf_1) + # + # # Use the Android client to request video streams URLs + # conf_2 = ClientConfig.new(client_type: ClientType::Android) + # YoutubeAPI::player(video_id: "dQw4w9WgXcQ", client_config: conf_2) + # + # # Proxy request through russian proxies + # conf_3 = ClientConfig.new(proxy_region: "RU") + # YoutubeAPI::next({video_id: "dQw4w9WgXcQ"}, client_config: conf_3) + # ``` + # + struct ClientConfig + # Type of client to emulate. + # See `enum ClientType` and `HARDCODED_CLIENTS`. + property client_type : ClientType - return _youtube_api_post_json("/youtubei/v1/browse", data) -end + # Region to provide to youtube, e.g to alter search results + # (this is passed as the `gl` parmeter). + property region : String | Nil -def request_youtube_api_browse(browse_id : String, params : String, region : String = "US") - # JSON Request data, required by the API - data = { - "browseId" => browse_id, - "context" => make_youtube_api_context(region), - } + # ISO code of country where the proxy is located. + # Used in case of geo-restricted videos. + property proxy_region : String | Nil + + # Initialization function + def initialize( + *, + @client_type = ClientType::Web, + @region = "US", + @proxy_region = nil + ) + end + + # Getter functions that provides easy access to hardcoded clients + # parameters (name/version strings and related API key) + def name : String + HARDCODED_CLIENTS[@client_type][:name] + end + + # :ditto: + def version : String + HARDCODED_CLIENTS[@client_type][:version] + end + + # :ditto: + def api_key : String + HARDCODED_CLIENTS[@client_type][:api_key] + end - # Append the additionnal parameters if those were provided - # (this is required for channel info, playlist and community, e.g) - if params != "" - data["params"] = params + # :ditto: + def screen : String + HARDCODED_CLIENTS[@client_type][:screen] + end + + # Convert to string, for logging purposes + def to_s + return { + client_type: self.name, + region: @region, + proxy_region: @proxy_region, + }.to_s + end end - return _youtube_api_post_json("/youtubei/v1/browse", data) -end + # Default client config, used if nothing is passed + DEFAULT_CLIENT_CONFIG = ClientConfig.new -#################################################################### -# request_youtube_api_search(search_query, params, region) -# -# Requests the youtubei/v1/search endpoint with the required headers -# and POST data in order to get a JSON reply. As the search results -# vary depending on the region, a region code can be specified in -# order to get non-US results. -# -# The requested data is a search string, with some additional -# paramters, formatted as a base64 string. -# -def request_youtube_api_search(search_query : String, params : String, region = nil) - # JSON Request data, required by the API - data = { - "query" => search_query, - "context" => make_youtube_api_context(region), - "params" => params, - } + #################################################################### + # make_context(client_config) + # + # Return, as a Hash, the "context" data required to request the + # youtube API endpoints. + # + private def make_context(client_config : ClientConfig | Nil) : Hash + # Use the default client config if nil is passed + client_config ||= DEFAULT_CLIENT_CONFIG - return _youtube_api_post_json("/youtubei/v1/search", data) -end + client_context = { + "client" => { + "hl" => "en", + "gl" => client_config.region || "US", # Can't be empty! + "clientName" => client_config.name, + "clientVersion" => client_config.version, + "thirdParty" => { + "embedUrl" => "", # Placeholder + }, + }, + } -#################################################################### -# _youtube_api_post_json(endpoint, data) -# -# Internal function that does the actual request to youtube servers -# and handles errors. -# -# The requested data is an endpoint (URL without the domain part) -# and the data as a Hash object. -# -def _youtube_api_post_json(endpoint, data) - # Send the POST request and parse result - response = YT_POOL.client &.post( - "#{endpoint}?key=#{HARDCODED_API_KEY}", - headers: HTTP::Headers{"content-type" => "application/json; charset=UTF-8"}, - body: data.to_json + # Add some more context if it exists in the client definitions + if !client_config.screen.empty? + client_context["client"]["clientScreen"] = client_config.screen + end + + # Replacing/removing the placeholder is easier than trying to + # merge two different Hash structures. + if client_config.screen == "EMBED" + client_context["client"]["thirdParty"] = { + "embedUrl" => "https://www.youtube.com/embed/dQw4w9WgXcQ", + } + else + client_context["client"].delete("thirdParty") + end + + return client_context + end + + #################################################################### + # browse(continuation, client_config?) + # browse(browse_id, params, client_config?) + # + # Requests the youtubei/v1/browse endpoint with the required headers + # and POST data in order to get a JSON reply in english that can + # be easily parsed. + # + # Both forms can take an optional ClientConfig parameter (see + # `struct ClientConfig` above for more details). + # + # The requested data can either be: + # + # - A continuation token (ctoken). Depending on this token's + # contents, the returned data can be playlist videos, channel + # community tab content, channel info, ... + # + # - A playlist ID (parameters MUST be an empty string) + # + def browse(continuation : String, client_config : ClientConfig | Nil = nil) + # JSON Request data, required by the API + data = { + "context" => self.make_context(client_config), + "continuation" => continuation, + } + + return self._post_json("/youtubei/v1/browse", data, client_config) + end + + # :ditto: + def browse( + browse_id : String, + *, # Force the following paramters to be passed by name + params : String, + client_config : ClientConfig | Nil = nil ) + # JSON Request data, required by the API + data = { + "browseId" => browse_id, + "context" => self.make_context(client_config), + } - initial_data = JSON.parse(response.body).as_h + # Append the additionnal parameters if those were provided + # (this is required for channel info, playlist and community, e.g) + if params != "" + data["params"] = params + end - # Error handling - if initial_data.has_key?("error") - code = initial_data["error"]["code"] - message = initial_data["error"]["message"].to_s.sub(/(\\n)+\^$/, "") + return self._post_json("/youtubei/v1/browse", data, client_config) + end - raise InfoException.new("Could not extract JSON. Youtube API returned \ - error #{code} with message:
\"#{message}\"") + #################################################################### + # next(continuation, client_config?) + # next(data, client_config?) + # + # Requests the youtubei/v1/next endpoint with the required headers + # and POST data in order to get a JSON reply in english that can + # be easily parsed. + # + # Both forms can take an optional ClientConfig parameter (see + # `struct ClientConfig` above for more details). + # + # The requested data can be: + # + # - A continuation token (ctoken). Depending on this token's + # contents, the returned data can be videos comments, + # their replies, ... In this case, the string must be passed + # directly to the function. E.g: + # + # ``` + # YoutubeAPI::next("ABCDEFGH_abcdefgh==") + # ``` + # + # - Arbitrary parameters, in Hash form. See examples below for + # known examples of arbitrary data that can be passed to YouTube: + # + # ``` + # # Get the videos related to a specific video ID + # YoutubeAPI::next({"videoId" => "dQw4w9WgXcQ"}) + # + # # Get a playlist video's details + # YoutubeAPI::next({ + # "videoId" => "9bZkp7q19f0", + # "playlistId" => "PL_oFlvgqkrjUVQwiiE3F3k3voF4tjXeP0", + # }) + # ``` + # + def next(continuation : String, *, client_config : ClientConfig | Nil = nil) + # JSON Request data, required by the API + data = { + "context" => self.make_context(client_config), + "continuation" => continuation, + } + + return self._post_json("/youtubei/v1/next", data, client_config) + end + + # :ditto: + def next(data : Hash, *, client_config : ClientConfig | Nil = nil) + # JSON Request data, required by the API + data2 = data.merge({ + "context" => self.make_context(client_config), + }) + + return self._post_json("/youtubei/v1/next", data2, client_config) + end + + # Allow a NamedTuple to be passed, too. + def next(data : NamedTuple, *, client_config : ClientConfig | Nil = nil) + return self.next(data.to_h, client_config: client_config) + end + + #################################################################### + # player(video_id, params, client_config?) + # + # Requests the youtubei/v1/player endpoint with the required headers + # and POST data in order to get a JSON reply. + # + # The requested data is a video ID (`v=` parameter), with some + # additional paramters, formatted as a base64 string. + # + # An optional ClientConfig parameter can be passed, too (see + # `struct ClientConfig` above for more details). + # + def player( + video_id : String, + *, # Force the following paramters to be passed by name + params : String, + client_config : ClientConfig | Nil = nil + ) + # JSON Request data, required by the API + data = { + "videoId" => video_id, + "context" => self.make_context(client_config), + } + + # Append the additionnal parameters if those were provided + if params != "" + data["params"] = params + end + + return self._post_json("/youtubei/v1/player", data, client_config) + end + + #################################################################### + # resolve_url(url) + # + # Requests the youtubei/v1/navigation/resolve_url endpoint with the + # required headers and POST data in order to get a JSON reply. + # + # Output: + # + # ``` + # # Valid channel "brand URL" gives the related UCID and browse ID + # channel_a = YoutubeAPI.resolve_url("https://youtube.com/c/google") + # channel_a # => { + # "endpoint": { + # "browseEndpoint": { + # "params": "EgC4AQA%3D", + # "browseId":"UCK8sQmJBp8GCxrOtXWBpyEA" + # }, + # ... + # } + # } + # + # # Invalid URL returns throws an InfoException + # channel_b = YoutubeAPI.resolve_url("https://youtube.com/c/invalid") + # ``` + # + def resolve_url(url : String) + data = { + "context" => self.make_context(nil), + "url" => url, + } + + return self._post_json("/youtubei/v1/navigation/resolve_url", data) end - return initial_data -end + #################################################################### + # search(search_query, params, client_config?) + # + # Requests the youtubei/v1/search endpoint with the required headers + # and POST data in order to get a JSON reply. As the search results + # vary depending on the region, a region code can be specified in + # order to get non-US results. + # + # The requested data is a search string, with some additional + # paramters, formatted as a base64 string. + # + # An optional ClientConfig parameter can be passed, too (see + # `struct ClientConfig` above for more details). + # + def search( + search_query : String, + params : String, + client_config : ClientConfig | Nil = nil + ) + # JSON Request data, required by the API + data = { + "query" => search_query, + "context" => self.make_context(client_config), + "params" => params, + } + + return self._post_json("/youtubei/v1/search", data, client_config) + end + + #################################################################### + # _post_json(endpoint, data, client_config?) + # + # Internal function that does the actual request to youtube servers + # and handles errors. + # + # The requested data is an endpoint (URL without the domain part) + # and the data as a Hash object. + # + def _post_json( + endpoint : String, + data : Hash, + client_config : ClientConfig | Nil + ) : Hash(String, JSON::Any) + # Use the default client config if nil is passed + client_config ||= DEFAULT_CLIENT_CONFIG + + # Query parameters + url = "#{endpoint}?key=#{client_config.api_key}" + + headers = HTTP::Headers{ + "Content-Type" => "application/json; charset=UTF-8", + "Accept-Encoding" => "gzip", + } + + # Logging + LOGGER.debug("YoutubeAPI: Using endpoint: \"#{endpoint}\"") + LOGGER.trace("YoutubeAPI: ClientConfig: #{client_config.to_s}") + LOGGER.trace("YoutubeAPI: POST data: #{data.to_s}") + + # Send the POST request + if client_config.proxy_region + response = YT_POOL.client( + client_config.proxy_region, + &.post(url, headers: headers, body: data.to_json) + ) + else + response = YT_POOL.client &.post( + url, headers: headers, body: data.to_json + ) + end + + # Convert result to Hash + initial_data = JSON.parse(response.body).as_h + + # Error handling + if initial_data.has_key?("error") + code = initial_data["error"]["code"] + message = initial_data["error"]["message"].to_s.sub(/(\\n)+\^$/, "") + + # Logging + LOGGER.error("YoutubeAPI: Got error #{code} when requesting #{endpoint}") + LOGGER.error("YoutubeAPI: #{message}") + LOGGER.info("YoutubeAPI: POST data was: #{data.to_s}") + + raise InfoException.new("Could not extract JSON. Youtube API returned \ + error #{code} with message:
\"#{message}\"") + end + + return initial_data + end +end # End of module diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index fe7f82f35..f56cc2ea9 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -361,7 +361,7 @@ def fetch_playlist(plid, locale) plid = "UU#{plid.lchop("UC")}" end - initial_data = request_youtube_api_browse("VL" + plid, params: "") + initial_data = YoutubeAPI.browse("VL" + plid, params: "") playlist_sidebar_renderer = initial_data["sidebar"]?.try &.["playlistSidebarRenderer"]?.try &.["items"]? raise InfoException.new("Could not extract playlistSidebarRenderer.") if !playlist_sidebar_renderer @@ -442,9 +442,9 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil) offset = (offset / 100).to_i64 * 100_i64 ctoken = produce_playlist_continuation(playlist.id, offset) - initial_data = request_youtube_api_browse(ctoken) + initial_data = YoutubeAPI.browse(ctoken) else - initial_data = request_youtube_api_browse("VL" + playlist.id, params: "") + initial_data = YoutubeAPI.browse("VL" + playlist.id, params: "") end return extract_playlist_videos(initial_data) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 662173a04..882d21ad8 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -244,7 +244,7 @@ def channel_search(query, page, channel) end continuation = produce_channel_search_continuation(ucid, query, page) - response_json = request_youtube_api_browse(continuation) + response_json = YoutubeAPI.browse(continuation) continuationItems = response_json["onResponseReceivedActions"]? .try &.[0]["appendContinuationItemsAction"]["continuationItems"] @@ -263,7 +263,8 @@ end def search(query, search_params = produce_search_params(content_type: "all"), region = nil) return 0, [] of SearchItem if query.empty? - initial_data = request_youtube_api_search(query, search_params, region) + client_config = YoutubeAPI::ClientConfig.new(region: region) + initial_data = YoutubeAPI.search(query, search_params, client_config: client_config) items = extract_items(initial_data) return items.size, items diff --git a/src/invidious/trending.cr b/src/invidious/trending.cr index 2ab1e7ba5..25bab4d20 100644 --- a/src/invidious/trending.cr +++ b/src/invidious/trending.cr @@ -14,7 +14,8 @@ def fetch_trending(trending_type, region, locale) params = "" end - initial_data = request_youtube_api_browse("FEtrending", params: params, region: region) + client_config = YoutubeAPI::ClientConfig.new(region: region) + initial_data = YoutubeAPI.browse("FEtrending", params: params, client_config: client_config) trending = extract_videos(initial_data) return {trending, plid}