diff --git a/include/result.h b/include/result.h index a19743d..b151f48 100644 --- a/include/result.h +++ b/include/result.h @@ -76,6 +76,9 @@ typedef struct { ERR_YOUTUBE_N_PARAM_FIND_IN_QUERY, ERR_YOUTUBE_N_PARAM_KVPAIR_ALLOC, ERR_YOUTUBE_N_PARAM_QUERY_APPEND_PLAINTEXT, + ERR_YOUTUBE_POT_PARAM_KVPAIR_ALLOC, + ERR_YOUTUBE_POT_PARAM_QUERY_APPEND_PLAINTEXT, + ERR_YOUTUBE_VISITOR_DATA_HEADER_ALLOC, ERR_YOUTUBE_STREAM_VISITOR_GET_URL, } err; int num; /* may hold errno, CURLcode, CURLUcode, etc */ diff --git a/include/youtube.h b/include/youtube.h index 90f87ad..44ac050 100644 --- a/include/youtube.h +++ b/include/youtube.h @@ -9,7 +9,8 @@ void youtube_global_cleanup(void); typedef struct youtube_stream *youtube_handle_t; -youtube_handle_t youtube_stream_init(void) WARN_UNUSED; +youtube_handle_t youtube_stream_init(const char *proof_of_origin, + const char *visitor_data) WARN_UNUSED; void youtube_stream_cleanup(youtube_handle_t h); struct youtube_setup_ops { diff --git a/main.c b/main.c index 379dda2..721e366 100644 --- a/main.c +++ b/main.c @@ -15,6 +15,7 @@ #include "youtube.h" #include +#include /* for getopt_long() */ #include #include #include @@ -24,10 +25,6 @@ #define PCRE2_CODE_UNIT_WIDTH 8 #include -static const char ARG_HELP[] = "--help"; -static const char ARG_SANDBOX[] = "--try-sandbox"; -static const char ARG_QUALITY[] = "--quality="; - const char *__asan_default_options(void) __attribute__((used)); const char * @@ -194,26 +191,59 @@ print_url(const char *url) } while (0) int -main(int argc, const char *argv[]) +main(int argc, char *argv[]) { int fd __attribute__((cleanup(coverage_cleanup))) = coverage_open(); - if (argc < 2) { + struct quality q = {NULL, NULL}; + const char *proof_of_origin = NULL; + const char *visitor_data = NULL; + + int synonym = 0; + struct option lo[] = { + {"help", no_argument, &synonym, 'h'}, + {"try-sandbox", no_argument, &synonym, 't'}, + {"quality", required_argument, &synonym, 'q'}, + {"proof-of-origin", required_argument, &synonym, 'p'}, + {"visitor-data", required_argument, &synonym, 'v'}, + }; + + int opt = 0; + while ((opt = getopt_long(argc, argv, "htq:p:v:", lo, NULL)) != -1) { + switch (opt == 0 ? synonym : opt) { + case 'h': + return usage(argv[0], EX_OK); + case 't': + return try_sandbox(); + case 'q': + if (!parse_quality_choices(optarg, &q)) { + return EX_DATAERR; + } + break; + case 'p': + proof_of_origin = optarg; + break; + case 'v': + visitor_data = optarg; + break; + default: + return usage(argv[0], EX_USAGE); + } + } + + if (optind >= argc) { + fprintf(stderr, "Expected URL argument after options\n"); return usage(argv[0], EX_USAGE); } - int idx = 1; - const char *arg1 = argv[idx]; - struct quality q = {NULL, NULL}; - if (0 == strncmp(ARG_HELP, arg1, strlen(ARG_HELP))) { - return usage(argv[0], EX_OK); - } else if (0 == strncmp(ARG_SANDBOX, arg1, strlen(ARG_SANDBOX))) { - return try_sandbox(); - } else if (0 == strncmp(ARG_QUALITY, arg1, strlen(ARG_QUALITY))) { - if (!parse_quality_choices(arg1 + strlen(ARG_QUALITY), &q)) { - return EX_DATAERR; - } - ++idx; + if (proof_of_origin == NULL || strlen(proof_of_origin) == 0) { + fprintf(stderr, "Expected --proof-of-origin value\n"); + return usage(argv[0], EX_USAGE); + } + + if (visitor_data == NULL || strlen(visitor_data) == 0) { + fprintf(stderr, "Expected --visitor-data value\n"); + return usage(argv[0], EX_USAGE); } int rc = EX_OK; @@ -222,7 +252,7 @@ main(int argc, const char *argv[]) check_stderr(youtube_global_init(), EX_SOFTWARE); check_stderr(sandbox_only_io_inet_tmpfile(), EX_OSERR); - stream = youtube_stream_init(); + stream = youtube_stream_init(proof_of_origin, visitor_data); if (stream == NULL) { fprintf(stderr, "ERROR: Cannot allocate stream object\n"); rc = EX_OSERR; @@ -241,7 +271,7 @@ main(int argc, const char *argv[]) .after = NULL, }; - const char *url = argv[idx]; + const char *url = argv[optind]; check_stderr(youtube_stream_setup(stream, &sops, &q, url), EX_DATAERR); check_stderr(youtube_stream_visitor(stream, print_url), EX_DATAERR); diff --git a/src/js.c b/src/js.c index 2392e22..fccd9ba 100644 --- a/src/js.c +++ b/src/js.c @@ -142,6 +142,7 @@ parse_json(const char *json, size_t json_sz, struct parse_ops *ops) result_t make_innertube_json(const char *target_url, + const char *proof_of_origin, long long int timestamp, char **body) { @@ -159,13 +160,13 @@ make_innertube_json(const char *target_url, debug("Parsed ID: %.*s", (int)sz, id); json_auto_t *obj = NULL; - obj = json_pack("{s{s{ss,ss,ss,ss,si}},ss%,s{s{ss,si}},sb,sb}", + obj = json_pack("{s{s{ss,ss,ss,ss,si}},ss%,s{ss},s{s{ss,si}},sb,sb}", "context", "client", "clientName", - "WEB_CREATOR", + "WEB", "clientVersion", - "1.20240723.03.00", + "2.20240726.00.00", "hl", "en", "timeZone", @@ -175,6 +176,9 @@ make_innertube_json(const char *target_url, "videoId", id, sz, + "serviceIntegrityDimensions", + "poToken", + proof_of_origin, "playbackContext", "contentPlaybackContext", "html5Preference", diff --git a/src/js.h b/src/js.h index 865f92d..dbf48e1 100644 --- a/src/js.h +++ b/src/js.h @@ -20,6 +20,7 @@ result_t parse_json(const char *json_text, struct parse_ops *ops) WARN_UNUSED; result_t make_innertube_json(const char *target_url, + const char *proof_of_origin, long long int timestamp, char **body); diff --git a/src/result.c b/src/result.c index ed6b0c1..af8a821 100644 --- a/src/result.c +++ b/src/result.c @@ -313,12 +313,22 @@ result_to_str(result_t r) my_snprintf("No n-parameter in query string: %s", r.msg); break; case ERR_YOUTUBE_N_PARAM_KVPAIR_ALLOC: - s = "Cannot allocate kv-pair buffer"; + s = "Cannot allocate kv-pair buffer for plaintext n-parameter"; break; case ERR_YOUTUBE_N_PARAM_QUERY_APPEND_PLAINTEXT: my_snprintf("Cannot append plaintext n-parameter: %s", url_error(r)); break; + case ERR_YOUTUBE_POT_PARAM_KVPAIR_ALLOC: + s = "Cannot allocate kv-pair buffer for proof of origin"; + break; + case ERR_YOUTUBE_POT_PARAM_QUERY_APPEND_PLAINTEXT: + my_snprintf("Cannot append proof of origin parameter: %s", + url_error(r)); + break; + case ERR_YOUTUBE_VISITOR_DATA_HEADER_ALLOC: + s = "Cannot allocate asprintf buffer for visitor data header"; + break; case ERR_YOUTUBE_STREAM_VISITOR_GET_URL: my_snprintf("Cannot get URL as string: %s", url_error(r)); break; diff --git a/src/url.c b/src/url.c index 1379d5f..05099b2 100644 --- a/src/url.c +++ b/src/url.c @@ -71,6 +71,7 @@ url_global_init(void) NULL, NULL, NULL, + NULL, FD_DISCARD); info_if(err.err, "Error creating early URL worker threads"); @@ -126,10 +127,11 @@ static const char CONTENT_TYPE_JSON[] = "Content-Type: application/json"; static const char DEFAULT_HOST_STR[] = "www.youtube.com"; result_t -url_download(const char *url_str, /* may be NULL */ - const char *host_str, /* may be NULL */ - const char *path_str, /* may be NULL */ - const char *post_body, /* may be NULL */ +url_download(const char *url_str, /* may be NULL */ + const char *host_str, /* may be NULL */ + const char *path_str, /* may be NULL */ + const char *post_body, /* may be NULL */ + const char *post_header, /* may be NULL */ int fd) { CURLU *url = NULL; @@ -170,14 +172,21 @@ url_download(const char *url_str, /* may be NULL */ if (post_body) { headers = curl_slist_append(headers, CONTENT_TYPE_JSON); - res = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - check_if_num(res, ERR_URL_DOWNLOAD_SET_OPT_HTTP_HEADER); res = curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post_body); /* Note: libcurl does not copy */ check_if_num(res, ERR_URL_DOWNLOAD_SET_OPT_POST_BODY); } + if (post_header) { + headers = curl_slist_append(headers, post_header); + } + + if (headers) { + res = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + check_if_num(res, ERR_URL_DOWNLOAD_SET_OPT_HTTP_HEADER); + } + res = CURL_EASY_PERFORM(curl, url_fragment_or_path_str, fd); check_if_num(res, ERR_URL_DOWNLOAD_PERFORM); diff --git a/src/url.h b/src/url.h index e741164..96e12be 100644 --- a/src/url.h +++ b/src/url.h @@ -12,6 +12,7 @@ result_t url_download(const char *url, const char *host, const char *path, const char *post_body, + const char *post_header, int fd) WARN_UNUSED; #endif diff --git a/src/youtube.c b/src/youtube.c index 118f46d..3753af8 100644 --- a/src/youtube.c +++ b/src/youtube.c @@ -16,6 +16,8 @@ struct youtube_stream { CURLU *url[2]; + char *proof_of_origin; + char *visitor_data; }; result_t @@ -30,23 +32,32 @@ youtube_global_cleanup(void) url_global_cleanup(); } +#define check_oom(p) \ + if ((p) == NULL) { \ + goto oom; \ + } + struct youtube_stream * -youtube_stream_init(void) +youtube_stream_init(const char *proof_of_origin, const char *visitor_data) { + assert(proof_of_origin && visitor_data); + struct youtube_stream *p = malloc(sizeof(*p)); - if (p == NULL) { - goto oom; - } + check_oom(p); - memset(p->url, 0, sizeof(p->url)); /* zero early, just in case */ + memset(p, 0, sizeof(*p)); /* zero early, just in case */ for (size_t i = 0; i < ARRAY_SIZE(p->url); ++i) { p->url[i] = curl_url(); /* may return NULL! */ - if (p->url[i] == NULL) { - goto oom; - } + check_oom(p->url[i]); } + p->proof_of_origin = strdup(proof_of_origin); + check_oom(p->proof_of_origin); + + p->visitor_data = strdup(visitor_data); + check_oom(p->visitor_data); + return p; oom: @@ -54,6 +65,8 @@ youtube_stream_init(void) return NULL; } +#undef check_oom + void youtube_stream_cleanup(struct youtube_stream *p) { @@ -64,6 +77,8 @@ youtube_stream_cleanup(struct youtube_stream *p) curl_url_cleanup(p->url[i]); /* handles NULL gracefully */ p->url[i] = NULL; } + free(p->proof_of_origin); + free(p->visitor_data); free(p); } @@ -90,14 +105,31 @@ youtube_stream_visitor(struct youtube_stream *p, void (*visit)(const char *)) return RESULT_OK; } +static void +asprintf_free(char **strp) +{ + free(*strp); +} + static WARN_UNUSED result_t youtube_stream_set_one(struct youtube_stream *p, int idx, const char *val, size_t sz __attribute__((unused))) { - CURLUcode uc = curl_url_set(p->url[idx], CURLUPART_URL, val, 0); + CURLUcode uc = CURLUE_OK; + CURLU *u = p->url[idx]; + + uc = curl_url_set(u, CURLUPART_URL, val, 0); check_if_num(uc, ERR_JS_PARSE_JSON_CALLBACK_GOT_CIPHERTEXT_URL); + + char *kv __attribute__((cleanup(asprintf_free))) = NULL; + const int rc = asprintf(&kv, "pot=%s", p->proof_of_origin); + check_if(rc < 0, ERR_YOUTUBE_POT_PARAM_KVPAIR_ALLOC); + + uc = curl_url_set(u, CURLUPART_QUERY, kv, CURLU_APPENDQUERY); + check_if_num(uc, ERR_YOUTUBE_POT_PARAM_QUERY_APPEND_PLAINTEXT); + return RESULT_OK; } @@ -231,12 +263,6 @@ pop_n_param_all(struct youtube_stream *p, char **results, size_t capacity) return RESULT_OK; } -static void -asprintf_free(char **strp) -{ - free(*strp); -} - static WARN_UNUSED result_t append_n_param(const char *plaintext, size_t sz, size_t pos, void *userdata) { @@ -259,13 +285,14 @@ download_and_mmap_tmpfd(const char *url, const char *host, const char *path, const char *post_body, + const char *post_header, int fd, void **addr, unsigned int *sz) { assert(fd >= 0); - check(url_download(url, host, path, post_body, fd)); + check(url_download(url, host, path, post_body, post_header, fd)); check(tmpmap(fd, addr, sz)); debug("Downloaded %s to fd=%d", url ? url : path, fd); @@ -275,6 +302,15 @@ download_and_mmap_tmpfd(const char *url, static const char INNERTUBE_URI[] = "https://www.youtube.com/youtubei/v1/player"; +static WARN_UNUSED result_t +make_http_header_visitor_id(const char *visitor_data, char **strp) +{ + const int rc = asprintf(strp, "X-Goog-Visitor-Id: %s", visitor_data); + check_if(rc < 0, ERR_YOUTUBE_VISITOR_DATA_HEADER_ALLOC); + debug("Formatted InnerTube header: %s", *strp); + return RESULT_OK; +} + struct downloaded { const char *description; /* does not own */ int fd; @@ -352,6 +388,7 @@ youtube_stream_setup(struct youtube_stream *p, NULL, NULL, NULL, + NULL, html.fd, &html.buf, &html.sz)); @@ -372,6 +409,7 @@ youtube_stream_setup(struct youtube_stream *p, "www.youtube.com", null_terminated_basejs, NULL, + NULL, js.fd, &js.buf, &js.sz)); @@ -380,11 +418,19 @@ youtube_stream_setup(struct youtube_stream *p, check(find_js_timestamp(js.buf, js.sz, ×tamp)); char *innertube_post __attribute__((cleanup(json_dump_free))) = NULL; - check(make_innertube_json(target, timestamp, &innertube_post)); + check(make_innertube_json(target, + p->proof_of_origin, + timestamp, + &innertube_post)); + + char *innertube_header __attribute__((cleanup(asprintf_free))) = NULL; + check(make_http_header_visitor_id(p->visitor_data, &innertube_header)); + check(download_and_mmap_tmpfd(INNERTUBE_URI, NULL, NULL, innertube_post, + innertube_header, json.fd, &json.buf, &json.sz)); diff --git a/tests/result.c b/tests/result.c index 7c684d8..954edb2 100644 --- a/tests/result.c +++ b/tests/result.c @@ -108,6 +108,10 @@ print_to_str_each_enum_value(void) ASSERT_IN(make(ERR_YOUTUBE_N_PARAM_KVPAIR_ALLOC), CANNOT_ALLOC); ASSERT_IN(make_n(ERR_YOUTUBE_N_PARAM_QUERY_APPEND_PLAINTEXT), "Cannot append"); + ASSERT_IN(make(ERR_YOUTUBE_POT_PARAM_KVPAIR_ALLOC), CANNOT_ALLOC); + ASSERT_IN(make_n(ERR_YOUTUBE_POT_PARAM_QUERY_APPEND_PLAINTEXT), + "Cannot append"); + ASSERT_IN(make(ERR_YOUTUBE_VISITOR_DATA_HEADER_ALLOC), CANNOT_ALLOC); ASSERT_IN(make_n(ERR_YOUTUBE_STREAM_VISITOR_GET_URL), CANNOT_GET); PASS(); } diff --git a/tests/youtube.c b/tests/youtube.c index eba7c2a..e6a88ab 100644 --- a/tests/youtube.c +++ b/tests/youtube.c @@ -112,6 +112,8 @@ global_setup(void) PASS(); } +#define youtube_stream_init() youtube_stream_init("POT", "VISITOR_DATA") + TEST stream_setup_with_redirected_network_io(const char *(*custom_fn)(const char *), const char *expected_audio_url, @@ -173,8 +175,8 @@ SUITE(stream_setup_simple) RUN_TEST(global_setup); RUN_TESTp(stream_setup_with_redirected_network_io, NULL, - "http://a.test/?n=AAA", - "http://v.test/?n=VVV"); + "http://a.test/?pot=POT&n=AAA", + "http://v.test/?pot=POT&n=VVV"); RUN_TEST(stream_setup_with_null_ops); } @@ -307,16 +309,16 @@ SUITE(stream_setup_n_param_positions) RUN_TEST(global_setup); RUN_TESTp(stream_setup_with_redirected_network_io, test_request_n_param_pos_middle, - "http://a.test/?first=foo&last=bar&n=AAA", - "http://v.test/?first=foo&last=bar&n=VVV"); + "http://a.test/?first=foo&last=bar&pot=POT&n=AAA", + "http://v.test/?first=foo&last=bar&pot=POT&n=VVV"); RUN_TESTp(stream_setup_with_redirected_network_io, test_request_n_param_pos_first, - "http://a.test/?second=foo&third=bar&n=AAA", - "http://v.test/?second=foo&third=bar&n=VVV"); + "http://a.test/?second=foo&third=bar&pot=POT&n=AAA", + "http://v.test/?second=foo&third=bar&pot=POT&n=VVV"); RUN_TESTp(stream_setup_with_redirected_network_io, test_request_n_param_pos_last, - "http://a.test/?first=foo&second=bar&n=AAA", - "http://v.test/?first=foo&second=bar&n=VVV"); + "http://a.test/?first=foo&second=bar&pot=POT&n=AAA", + "http://v.test/?first=foo&second=bar&pot=POT&n=VVV"); RUN_TEST(stream_setup_edge_cases_n_param_missing); RUN_TEST(stream_setup_edge_cases_entire_url_missing); }