From 26f1b4e7dccc00e8e8cee3c7db736e2e623687c0 Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Sat, 23 Jan 2021 18:17:35 +0100 Subject: [PATCH 1/8] Support SoundCloud HLS by using a workaround This commit tries to support SoundCloud HLS streams by parsing M3U manifests, get the last segment URL (in order to get track length) and request a segment URL equals to track's duration so it's a single URL. --- .../extractors/SoundcloudStreamExtractor.java | 72 ++++++++++++++----- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index 953fa684a6..b6f2b8cfeb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -10,7 +10,6 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; -import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -29,7 +28,10 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; import static org.schabi.newpipe.extractor.utils.Utils.*; public class SoundcloudStreamExtractor extends StreamExtractor { @@ -182,7 +184,7 @@ public String getHlsUrl() { @Override public List getAudioStreams() throws IOException, ExtractionException { - List audioStreams = new ArrayList<>(); + final List audioStreams = new ArrayList<>(); final Downloader dl = NewPipe.getDownloader(); // Streams can be streamable and downloadable - or explicitly not. @@ -193,43 +195,77 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); - // get information about what stream formats are available - for (Object transcoding : transcodings) { - + // Get information about what stream formats are available + for (final Object transcoding : transcodings) { final JsonObject t = (JsonObject) transcoding; String url = t.getString("url"); + final String mediaUrl; + final MediaFormat mediaFormat; + final int bitrate; if (!isNullOrEmpty(url)) { + if (t.getString("preset").contains("mp3")) { + mediaFormat = MediaFormat.MP3; + bitrate = 128; + } else if (t.getString("preset").contains("opus")) { + mediaFormat = MediaFormat.OPUS; + bitrate = 64; + } else { + continue; + } + + // TODO: move this to a separate method to generate valid urls when needed (e.g. resuming a paused stream) - // We can only play the mp3 format, but not handle m3u playlists / streams. - // what about Opus? - if (t.getString("preset").contains("mp3") - && t.getObject("format").getString("protocol").equals("progressive")) { + if (t.getObject("format").getString("protocol").equals("progressive")) { // This url points to the endpoint which generates a unique and short living url to the stream. - // TODO: move this to a separate method to generate valid urls when needed (e.g. resuming a paused stream) url += "?client_id=" + SoundcloudParsingHelper.clientId(); final String res = dl.get(url).responseBody(); try { JsonObject mp3UrlObject = JsonParser.object().from(res); // Links in this file are also only valid for a short period. - audioStreams.add(new AudioStream(mp3UrlObject.getString("url"), - MediaFormat.MP3, 128)); - } catch (JsonParserException e) { + mediaUrl = mp3UrlObject.getString("url"); + } catch (final JsonParserException e) { + throw new ParsingException("Could not parse streamable url", e); + } + } else if (t.getObject("format").getString("protocol").equals("hls")) { + // This url points to the endpoint which generates a unique and short living url to the stream. + url += "?client_id=" + SoundcloudParsingHelper.clientId(); + final String res = dl.get(url).responseBody(); + + try { + final JsonObject mp3HlsUrlObject = JsonParser.object().from(res); + // Links in this file are also only valid for a short period. + + // Parsing the HLS manifest to get a single file by requesting a range equal to 0-track_length + final String hlsManifestResponse = dl.get(mp3HlsUrlObject.getString("url")).responseBody(); + final List hlsRangesList = new ArrayList<>(); + final Matcher regex = Pattern.compile("((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)") + .matcher(hlsManifestResponse); + + while (regex.find()) { + hlsRangesList.add(hlsManifestResponse.substring(regex.start(0), regex.end(0))); + } + + final String hlsLastRangeUrl = hlsRangesList.get(hlsRangesList.size() - 1); + final String[] hlsLastRangeUrlArray = hlsLastRangeUrl.split("/"); + + mediaUrl = HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + hlsLastRangeUrlArray[6]; + } catch (final JsonParserException e) { throw new ParsingException("Could not parse streamable url", e); } + } else { + continue; } + + audioStreams.add(new AudioStream(mediaUrl, mediaFormat, bitrate)); } } - } catch (NullPointerException e) { + } catch (final NullPointerException e) { throw new ExtractionException("Could not get SoundCloud's track audio url", e); } - if (audioStreams.isEmpty()) { - throw new ContentNotSupportedException("HLS audio streams are not yet supported"); - } - return audioStreams; } From cbacd3c0a5fda97c0496ceda0ed223bc3d5e074a Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Mon, 25 Jan 2021 20:42:08 +0100 Subject: [PATCH 2/8] Add a check to don't show MP3 128kbps stream twice and catch IOException when fetching the HLS Manifest If a progressive stream is present in the transcodings, it's unnecessary to show twice an MP3 128kbps stream so if this is the case, the MP3 HLS stream will be not added to the audioStreams, else it will. This commit also catch fetching errors in HLS manifests parsing and don't add the corresponding stream if an error occurs. --- .../extractors/SoundcloudStreamExtractor.java | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index b6f2b8cfeb..22873c6c2e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -195,6 +195,19 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); + // Iterate a first time to see if there is a progressive MP3 stream available. + // If yes, the MP3 HLS stream will be not added to audioStreams. + + boolean mp3ProgressiveStreamInTranscodings = false; + + for (final Object transcoding : transcodings) { + final JsonObject t = (JsonObject) transcoding; + if (t.getString("preset").contains("mp3") && + t.getObject("format").getString("protocol").equals("progressive")) { + mp3ProgressiveStreamInTranscodings = true; + } + } + // Get information about what stream formats are available for (final Object transcoding : transcodings) { final JsonObject t = (JsonObject) transcoding; @@ -205,6 +218,13 @@ public List getAudioStreams() throws IOException, ExtractionExcepti if (!isNullOrEmpty(url)) { if (t.getString("preset").contains("mp3")) { + // Don't add the MP3 HLS stream if there is a progressive stream present + // because the two have the same bitrate + if (t.getObject("format").getString("protocol").equals("hls") && + mp3ProgressiveStreamInTranscodings) { + continue; + } + mediaFormat = MediaFormat.MP3; bitrate = 128; } else if (t.getString("preset").contains("opus")) { @@ -222,13 +242,14 @@ public List getAudioStreams() throws IOException, ExtractionExcepti final String res = dl.get(url).responseBody(); try { - JsonObject mp3UrlObject = JsonParser.object().from(res); + final JsonObject mp3UrlObject = JsonParser.object().from(res); // Links in this file are also only valid for a short period. mediaUrl = mp3UrlObject.getString("url"); } catch (final JsonParserException e) { throw new ParsingException("Could not parse streamable url", e); } } else if (t.getObject("format").getString("protocol").equals("hls")) { + // This url points to the endpoint which generates a unique and short living url to the stream. url += "?client_id=" + SoundcloudParsingHelper.clientId(); final String res = dl.get(url).responseBody(); @@ -236,9 +257,13 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonObject mp3HlsUrlObject = JsonParser.object().from(res); // Links in this file are also only valid for a short period. - // Parsing the HLS manifest to get a single file by requesting a range equal to 0-track_length - final String hlsManifestResponse = dl.get(mp3HlsUrlObject.getString("url")).responseBody(); + final String hlsManifestResponse; + try { + hlsManifestResponse = dl.get(mp3HlsUrlObject.getString("url")).responseBody(); + } catch (final IOException e) { + continue; + } final List hlsRangesList = new ArrayList<>(); final Matcher regex = Pattern.compile("((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)") .matcher(hlsManifestResponse); From 3bd08a28802a7679e5bf468b969db5410c8e578a Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Sat, 20 Feb 2021 15:59:05 +0100 Subject: [PATCH 3/8] Adress requested changes and use final where possible in SoundcloudStreamExtractor This commit moved the HLS parsing task to a separate method, did little performance improvements and used final where possible in the SoundcloudStreamExtractor file. --- .../extractors/SoundcloudStreamExtractor.java | 81 ++++++++++++------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index 22873c6c2e..5dbada6e25 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -13,6 +13,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.SoundCloudGoPlusContentException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; @@ -45,7 +46,7 @@ public SoundcloudStreamExtractor(StreamingService service, LinkHandler linkHandl public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { track = SoundcloudParsingHelper.resolveFor(downloader, getUrl()); - String policy = track.getString("policy", EMPTY_STRING); + final String policy = track.getString("policy", EMPTY_STRING); if (!policy.equals("ALLOW") && !policy.equals("MONETIZE")) { if (policy.equals("SNIP")) { throw new SoundCloudGoPlusContentException(); @@ -194,10 +195,8 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); - // Iterate a first time to see if there is a progressive MP3 stream available. // If yes, the MP3 HLS stream will be not added to audioStreams. - boolean mp3ProgressiveStreamInTranscodings = false; for (final Object transcoding : transcodings) { @@ -205,38 +204,40 @@ public List getAudioStreams() throws IOException, ExtractionExcepti if (t.getString("preset").contains("mp3") && t.getObject("format").getString("protocol").equals("progressive")) { mp3ProgressiveStreamInTranscodings = true; + break; } } // Get information about what stream formats are available for (final Object transcoding : transcodings) { final JsonObject t = (JsonObject) transcoding; - String url = t.getString("url"); final String mediaUrl; + final String preset = t.getString("preset"); + final String protocol = t.getObject("format").getString("protocol"); + String url = t.getString("url"); final MediaFormat mediaFormat; final int bitrate; if (!isNullOrEmpty(url)) { - if (t.getString("preset").contains("mp3")) { + if (preset.contains("mp3")) { // Don't add the MP3 HLS stream if there is a progressive stream present // because the two have the same bitrate - if (t.getObject("format").getString("protocol").equals("hls") && - mp3ProgressiveStreamInTranscodings) { + if (mp3ProgressiveStreamInTranscodings && protocol.equals("hls")) { continue; } - mediaFormat = MediaFormat.MP3; bitrate = 128; - } else if (t.getString("preset").contains("opus")) { + } else if (preset.contains("opus")) { mediaFormat = MediaFormat.OPUS; bitrate = 64; } else { + // Unknown format continue; } // TODO: move this to a separate method to generate valid urls when needed (e.g. resuming a paused stream) - if (t.getObject("format").getString("protocol").equals("progressive")) { + if (protocol.equals("progressive")) { // This url points to the endpoint which generates a unique and short living url to the stream. url += "?client_id=" + SoundcloudParsingHelper.clientId(); final String res = dl.get(url).responseBody(); @@ -248,8 +249,7 @@ public List getAudioStreams() throws IOException, ExtractionExcepti } catch (final JsonParserException e) { throw new ParsingException("Could not parse streamable url", e); } - } else if (t.getObject("format").getString("protocol").equals("hls")) { - + } else if (protocol.equals("hls")) { // This url points to the endpoint which generates a unique and short living url to the stream. url += "?client_id=" + SoundcloudParsingHelper.clientId(); final String res = dl.get(url).responseBody(); @@ -257,29 +257,17 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonObject mp3HlsUrlObject = JsonParser.object().from(res); // Links in this file are also only valid for a short period. - // Parsing the HLS manifest to get a single file by requesting a range equal to 0-track_length - final String hlsManifestResponse; try { - hlsManifestResponse = dl.get(mp3HlsUrlObject.getString("url")).responseBody(); - } catch (final IOException e) { + mediaUrl = getSingleUrlFromHlsManifest(mp3HlsUrlObject.getString("url")); + } catch (final ParsingException e) { + // Something went during HLS manifest parsing, don't add this stream to audioStreams continue; } - final List hlsRangesList = new ArrayList<>(); - final Matcher regex = Pattern.compile("((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)") - .matcher(hlsManifestResponse); - - while (regex.find()) { - hlsRangesList.add(hlsManifestResponse.substring(regex.start(0), regex.end(0))); - } - - final String hlsLastRangeUrl = hlsRangesList.get(hlsRangesList.size() - 1); - final String[] hlsLastRangeUrlArray = hlsLastRangeUrl.split("/"); - - mediaUrl = HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + hlsLastRangeUrlArray[6]; } catch (final JsonParserException e) { throw new ParsingException("Could not parse streamable url", e); } } else { + // Unknown protocol continue; } @@ -294,10 +282,43 @@ public List getAudioStreams() throws IOException, ExtractionExcepti return audioStreams; } - private static String urlEncode(String value) { + private final static Pattern PATTERN_WEB_URLS_IN_HLS_MANIFESTS = Pattern.compile("((http?|https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)"); + + /** Parses a SoundCloud HLS manifest to get a single URL of HLS streams. + *

+ * This method downloads the provided manifest URL, find all web occurrences using a regex, get + * the last segment URL, changes its segment range to {@code 0/track-length} and return this string. + * @param hlsManifestUrl the URL of the manifest to be parsed + * @return a single URL that contains a range equal to the length of the track + */ + private static String getSingleUrlFromHlsManifest(final String hlsManifestUrl) throws ParsingException { + final Downloader dl = NewPipe.getDownloader(); + final String hlsManifestResponse; + + try { + hlsManifestResponse = dl.get(hlsManifestUrl).responseBody(); + } catch (final IOException | ReCaptchaException e) { + throw new ParsingException("Could not get SoundCloud HLS Manifest"); + } + + final List hlsRangesList = new ArrayList<>(); + final Matcher pattern_matches = PATTERN_WEB_URLS_IN_HLS_MANIFESTS.matcher(hlsManifestResponse); + + while (pattern_matches.find()) { + hlsRangesList.add(hlsManifestResponse.substring(pattern_matches.start(0), + pattern_matches.end(0))); + } + + final String hlsLastRangeUrl = hlsRangesList.get(hlsRangesList.size() - 1); + final String[] hlsLastRangeUrlArray = hlsLastRangeUrl.split("/"); + + return HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + hlsLastRangeUrlArray[6]; + } + + private static String urlEncode(final String value) { try { return URLEncoder.encode(value, UTF_8); - } catch (UnsupportedEncodingException e) { + } catch (final UnsupportedEncodingException e) { throw new IllegalStateException(e); } } From 0438828e367ad617cf6e0b97d42caa2b7ba4e518 Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Tue, 2 Mar 2021 18:20:10 +0100 Subject: [PATCH 4/8] Add a test for the number of audioStreams in CreativeCommonsPlaysWellWithOthers test It should be only two audio streams for track "Plays Well with Others, Ep 2: What Do an Army of Ants and an Online Encyclopedia Have in Common?" by Creative Commons (https://soundcloud.com/wearecc/plays-well-with-others-ep-2-what-do-an-army-of-ants-and-an-online-encyclopedia-have-in-common): - one which is a progressive stream, in MP3 format with a bitrate of 128 kbps - one which is an HLS stream, in OPUS format with a bitrate of 64 kbps. --- .../SoundcloudStreamExtractorTest.java | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java index f79a789852..e8705107f5 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java @@ -1,15 +1,16 @@ package org.schabi.newpipe.extractor.services.soundcloud; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.schabi.newpipe.downloader.DownloaderTestImpl; +import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException; import org.schabi.newpipe.extractor.exceptions.SoundCloudGoPlusContentException; import org.schabi.newpipe.extractor.services.DefaultStreamExtractorTest; +import org.schabi.newpipe.extractor.stream.AudioStream; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamType; @@ -19,12 +20,14 @@ import javax.annotation.Nullable; +import static junit.framework.TestCase.assertEquals; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; public class SoundcloudStreamExtractorTest { private static final String SOUNDCLOUD = "https://soundcloud.com/"; - @Ignore("Ignore until #526 is merged. Throwing the ContentNotSupportedException is wrong and going to be fixed by that PR.") public static class SoundcloudGeoRestrictedTrack extends DefaultStreamExtractorTest { private static final String ID = "one-touch"; private static final String UPLOADER = SOUNDCLOUD + "jessglynne"; @@ -143,6 +146,25 @@ public static void setUp() throws Exception { @Override public boolean expectedHasSubtitles() { return false; } @Override public boolean expectedHasFrames() { return false; } @Override public int expectedStreamSegmentsCount() { return 0; } - } + @Override + @Test + public void testAudioStreams() throws Exception { + super.testAudioStreams(); + final List audioStreams = extractor.getAudioStreams(); + assertEquals(2, audioStreams.size()); + for (final AudioStream audioStream : audioStreams) { + final String mediaUrl = audioStream.getUrl(); + if (audioStream.getFormat() == MediaFormat.OPUS) { + // assert that it's an OPUS 64 kbps media URL with a single range which comes from an HLS SoundCloud CDN + assertThat(mediaUrl, containsString("-hls-opus-media.sndcdn.com")); + assertThat(mediaUrl, containsString(".64.opus")); + } + if (audioStream.getFormat() == MediaFormat.MP3) { + // assert that it's a MP3 128 kbps media URL which comes from a progressive SoundCloud CDN + assertThat(mediaUrl, containsString("-media.sndcdn.com/bKOA7Pwbut93.128.mp3")); + } + } + } + } } From a7b15b51e6731f4dd7d839bd62f5dfde1a1ebd7d Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Wed, 3 Mar 2021 13:03:38 +0100 Subject: [PATCH 5/8] Change t to transcodingJsonObject in SoundcloudStreamExtractor --- .../extractors/SoundcloudStreamExtractor.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index 5dbada6e25..0f4ef42e1d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -200,9 +200,9 @@ public List getAudioStreams() throws IOException, ExtractionExcepti boolean mp3ProgressiveStreamInTranscodings = false; for (final Object transcoding : transcodings) { - final JsonObject t = (JsonObject) transcoding; - if (t.getString("preset").contains("mp3") && - t.getObject("format").getString("protocol").equals("progressive")) { + final JsonObject transcodingJsonObject = (JsonObject) transcoding; + if (transcodingJsonObject.getString("preset").contains("mp3") && + transcodingJsonObject.getObject("format").getString("protocol").equals("progressive")) { mp3ProgressiveStreamInTranscodings = true; break; } @@ -210,11 +210,11 @@ public List getAudioStreams() throws IOException, ExtractionExcepti // Get information about what stream formats are available for (final Object transcoding : transcodings) { - final JsonObject t = (JsonObject) transcoding; + final JsonObject transcodingJsonObject = (JsonObject) transcoding; final String mediaUrl; - final String preset = t.getString("preset"); - final String protocol = t.getObject("format").getString("protocol"); - String url = t.getString("url"); + final String preset = transcodingJsonObject.getString("preset"); + final String protocol = transcodingJsonObject.getObject("format").getString("protocol"); + String url = transcodingJsonObject.getString("url"); final MediaFormat mediaFormat; final int bitrate; From d61d9d116dec8aeeb1763cf5644662f2a6c32c50 Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Sat, 13 Mar 2021 15:26:05 +0100 Subject: [PATCH 6/8] Refactor getAudioStreams method of SoundcloudStreamExtractor Split the method into private methods, in order to have a better reading. --- .../extractors/SoundcloudStreamExtractor.java | 169 +++++++++--------- 1 file changed, 86 insertions(+), 83 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index 0f4ef42e1d..a6abbe2792 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -184,9 +184,8 @@ public String getHlsUrl() { } @Override - public List getAudioStreams() throws IOException, ExtractionException { + public List getAudioStreams() throws ExtractionException { final List audioStreams = new ArrayList<>(); - final Downloader dl = NewPipe.getDownloader(); // Streams can be streamable and downloadable - or explicitly not. // For playing the track, it is only necessary to have a streamable track. @@ -195,99 +194,101 @@ public List getAudioStreams() throws IOException, ExtractionExcepti try { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); - // Iterate a first time to see if there is a progressive MP3 stream available. - // If yes, the MP3 HLS stream will be not added to audioStreams. - boolean mp3ProgressiveStreamInTranscodings = false; - - for (final Object transcoding : transcodings) { - final JsonObject transcodingJsonObject = (JsonObject) transcoding; - if (transcodingJsonObject.getString("preset").contains("mp3") && - transcodingJsonObject.getObject("format").getString("protocol").equals("progressive")) { - mp3ProgressiveStreamInTranscodings = true; - break; - } + if (transcodings != null) { + // Get information about what stream formats are available + setUpAudioStreams(transcodings, checkMp3ProgressivePresence(transcodings), + audioStreams); } + } catch (final NullPointerException e) { + throw new ExtractionException("Could not get SoundCloud's tracks audio URL", e); + } - // Get information about what stream formats are available - for (final Object transcoding : transcodings) { - final JsonObject transcodingJsonObject = (JsonObject) transcoding; - final String mediaUrl; - final String preset = transcodingJsonObject.getString("preset"); - final String protocol = transcodingJsonObject.getObject("format").getString("protocol"); - String url = transcodingJsonObject.getString("url"); - final MediaFormat mediaFormat; - final int bitrate; - - if (!isNullOrEmpty(url)) { - if (preset.contains("mp3")) { - // Don't add the MP3 HLS stream if there is a progressive stream present - // because the two have the same bitrate - if (mp3ProgressiveStreamInTranscodings && protocol.equals("hls")) { - continue; - } - mediaFormat = MediaFormat.MP3; - bitrate = 128; - } else if (preset.contains("opus")) { - mediaFormat = MediaFormat.OPUS; - bitrate = 64; - } else { - // Unknown format - continue; - } + return audioStreams; + } + + private static boolean checkMp3ProgressivePresence(final JsonArray transcodings) { + boolean presence = false; + for (final Object transcoding : transcodings) { + final JsonObject transcodingJsonObject = (JsonObject) transcoding; + if (transcodingJsonObject.getString("preset").contains("mp3") && + transcodingJsonObject.getObject("format").getString("protocol") + .equals("progressive")) { + presence = true; + break; + } + } + return presence; + } - // TODO: move this to a separate method to generate valid urls when needed (e.g. resuming a paused stream) - - if (protocol.equals("progressive")) { - // This url points to the endpoint which generates a unique and short living url to the stream. - url += "?client_id=" + SoundcloudParsingHelper.clientId(); - final String res = dl.get(url).responseBody(); - - try { - final JsonObject mp3UrlObject = JsonParser.object().from(res); - // Links in this file are also only valid for a short period. - mediaUrl = mp3UrlObject.getString("url"); - } catch (final JsonParserException e) { - throw new ParsingException("Could not parse streamable url", e); - } - } else if (protocol.equals("hls")) { - // This url points to the endpoint which generates a unique and short living url to the stream. - url += "?client_id=" + SoundcloudParsingHelper.clientId(); - final String res = dl.get(url).responseBody(); - - try { - final JsonObject mp3HlsUrlObject = JsonParser.object().from(res); - // Links in this file are also only valid for a short period. - try { - mediaUrl = getSingleUrlFromHlsManifest(mp3HlsUrlObject.getString("url")); - } catch (final ParsingException e) { - // Something went during HLS manifest parsing, don't add this stream to audioStreams - continue; - } - } catch (final JsonParserException e) { - throw new ParsingException("Could not parse streamable url", e); - } - } else { - // Unknown protocol + @Nonnull + private static String getTranscodingUrl(final String endpointUrl, final String protocol) throws IOException, ExtractionException { + final Downloader downloader = NewPipe.getDownloader(); + final String apiStreamUrl = endpointUrl + "?client_id=" + SoundcloudParsingHelper.clientId(); + final String response = downloader.get(apiStreamUrl).responseBody(); + final JsonObject urlObject; + try { + urlObject = JsonParser.object().from(response); + } catch (final JsonParserException e) { + throw new ParsingException("Could not parse streamable url", e); + } + final String urlString = urlObject.getString("url"); + + if (protocol.equals("progressive")) { + return urlString; + } else if (protocol.equals("hls")) { + return getSingleUrlFromHlsManifest(urlString); + } + // else, unknown protocol + return ""; + } + + private static void setUpAudioStreams(final JsonArray transcodings, + final boolean mp3ProgressiveInStreams, + final List audioStreams) { + for (final Object transcoding : transcodings) { + final JsonObject transcodingJsonObject = (JsonObject) transcoding; + final String mediaUrl; + final String preset = transcodingJsonObject.getString("preset"); + final String protocol = transcodingJsonObject.getObject("format").getString("protocol"); + final String url = transcodingJsonObject.getString("url"); + final MediaFormat mediaFormat; + final int bitrate; + + if (!isNullOrEmpty(url)) { + if (preset.contains("mp3")) { + // Don't add the MP3 HLS stream if there is a progressive stream present + // because the two have the same bitrate + if (mp3ProgressiveInStreams && protocol.equals("hls")) { continue; } + mediaFormat = MediaFormat.MP3; + bitrate = 128; + } else if (preset.contains("opus")) { + mediaFormat = MediaFormat.OPUS; + bitrate = 64; + } else { + // Unknown format + continue; + } - audioStreams.add(new AudioStream(mediaUrl, mediaFormat, bitrate)); + try { + mediaUrl = getTranscodingUrl(url, protocol); + } catch (final Exception e) { + // something went wrong when parsing this transcoding + continue; } + audioStreams.add(new AudioStream(mediaUrl, mediaFormat, bitrate)); } - - } catch (final NullPointerException e) { - throw new ExtractionException("Could not get SoundCloud's track audio url", e); } - - return audioStreams; } - private final static Pattern PATTERN_WEB_URLS_IN_HLS_MANIFESTS = Pattern.compile("((http?|https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)"); + private final static Pattern PATTERN_HTTPS_URLS_IN_HLS_MANIFESTS = Pattern.compile("((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)"); /** Parses a SoundCloud HLS manifest to get a single URL of HLS streams. *

* This method downloads the provided manifest URL, find all web occurrences using a regex, get - * the last segment URL, changes its segment range to {@code 0/track-length} and return this string. + * the last segment URL, changes its segment range to {@code 0/track-length} and return this + * string. * @param hlsManifestUrl the URL of the manifest to be parsed * @return a single URL that contains a range equal to the length of the track */ @@ -298,11 +299,12 @@ private static String getSingleUrlFromHlsManifest(final String hlsManifestUrl) t try { hlsManifestResponse = dl.get(hlsManifestUrl).responseBody(); } catch (final IOException | ReCaptchaException e) { - throw new ParsingException("Could not get SoundCloud HLS Manifest"); + throw new ParsingException("Could not get SoundCloud HLS manifest"); } final List hlsRangesList = new ArrayList<>(); - final Matcher pattern_matches = PATTERN_WEB_URLS_IN_HLS_MANIFESTS.matcher(hlsManifestResponse); + final Matcher pattern_matches = PATTERN_HTTPS_URLS_IN_HLS_MANIFESTS + .matcher(hlsManifestResponse); while (pattern_matches.find()) { hlsRangesList.add(hlsManifestResponse.substring(pattern_matches.start(0), @@ -312,7 +314,8 @@ private static String getSingleUrlFromHlsManifest(final String hlsManifestUrl) t final String hlsLastRangeUrl = hlsRangesList.get(hlsRangesList.size() - 1); final String[] hlsLastRangeUrlArray = hlsLastRangeUrl.split("/"); - return HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + hlsLastRangeUrlArray[6]; + return HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + + hlsLastRangeUrlArray[6]; } private static String urlEncode(final String value) { From 0e3e420a25637bcd29a6af9db14b464bc5bfd7a8 Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Sat, 13 Mar 2021 15:49:59 +0100 Subject: [PATCH 7/8] Fix tests --- .../soundcloud/extractors/SoundcloudStreamExtractor.java | 4 +++- .../services/soundcloud/SoundcloudStreamExtractorTest.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index a6abbe2792..e363a6f260 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -37,6 +37,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor { private JsonObject track; + private boolean isAvailable = true; public SoundcloudStreamExtractor(StreamingService service, LinkHandler linkHandler) { super(service, linkHandler); @@ -48,6 +49,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr final String policy = track.getString("policy", EMPTY_STRING); if (!policy.equals("ALLOW") && !policy.equals("MONETIZE")) { + isAvailable = false; if (policy.equals("SNIP")) { throw new SoundCloudGoPlusContentException(); } @@ -190,7 +192,7 @@ public List getAudioStreams() throws ExtractionException { // Streams can be streamable and downloadable - or explicitly not. // For playing the track, it is only necessary to have a streamable track. // If this is not the case, this track might not be published yet. - if (!track.getBoolean("streamable")) return audioStreams; + if (!track.getBoolean("streamable") || !isAvailable) return audioStreams; try { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java index e8705107f5..ddf95c31e6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorTest.java @@ -6,7 +6,6 @@ import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; -import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException; import org.schabi.newpipe.extractor.exceptions.SoundCloudGoPlusContentException; import org.schabi.newpipe.extractor.services.DefaultStreamExtractorTest; @@ -62,6 +61,7 @@ public void geoRestrictedContent() throws Exception { @Nullable @Override public String expectedTextualUploadDate() { return "2019-05-16 16:28:45"; } @Override public long expectedLikeCountAtLeast() { return -1; } @Override public long expectedDislikeCountAtLeast() { return -1; } + @Override public boolean expectedHasAudioStreams() { return false; } @Override public boolean expectedHasVideoStreams() { return false; } @Override public boolean expectedHasSubtitles() { return false; } @Override public boolean expectedHasFrames() { return false; } @@ -103,7 +103,9 @@ public void goPlusContent() throws Exception { @Nullable @Override public String expectedTextualUploadDate() { return "2016-11-11 01:16:37"; } @Override public long expectedLikeCountAtLeast() { return -1; } @Override public long expectedDislikeCountAtLeast() { return -1; } + @Override public boolean expectedHasAudioStreams() { return false; } @Override public boolean expectedHasVideoStreams() { return false; } + @Override public boolean expectedHasRelatedStreams() { return false; } @Override public boolean expectedHasSubtitles() { return false; } @Override public boolean expectedHasFrames() { return false; } @Override public int expectedStreamSegmentsCount() { return 0; } From 379d7312faaa5f1e21987f09a968f6590471745c Mon Sep 17 00:00:00 2001 From: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com> Date: Sun, 14 Mar 2021 17:53:08 +0100 Subject: [PATCH 8/8] Don't use a regular expression to find the last segment URL and do code improvements Apply suggestions provided in the PR and remove a redundant import. --- .../extractors/SoundcloudStreamExtractor.java | 92 +++++++++---------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java index e363a6f260..613cce2ccc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudStreamExtractor.java @@ -29,10 +29,7 @@ import java.util.Collections; import java.util.List; import java.util.Locale; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; import static org.schabi.newpipe.extractor.utils.Utils.*; public class SoundcloudStreamExtractor extends StreamExtractor { @@ -198,7 +195,7 @@ public List getAudioStreams() throws ExtractionException { final JsonArray transcodings = track.getObject("media").getArray("transcodings"); if (transcodings != null) { // Get information about what stream formats are available - setUpAudioStreams(transcodings, checkMp3ProgressivePresence(transcodings), + extractAudioStreams(transcodings, checkMp3ProgressivePresence(transcodings), audioStreams); } } catch (final NullPointerException e) { @@ -238,59 +235,61 @@ private static String getTranscodingUrl(final String endpointUrl, final String p if (protocol.equals("progressive")) { return urlString; } else if (protocol.equals("hls")) { - return getSingleUrlFromHlsManifest(urlString); + try { + return getSingleUrlFromHlsManifest(urlString); + } catch (final ParsingException ignored) { + } } // else, unknown protocol return ""; } - private static void setUpAudioStreams(final JsonArray transcodings, - final boolean mp3ProgressiveInStreams, - final List audioStreams) { + private static void extractAudioStreams(final JsonArray transcodings, + final boolean mp3ProgressiveInStreams, + final List audioStreams) { for (final Object transcoding : transcodings) { final JsonObject transcodingJsonObject = (JsonObject) transcoding; + final String url = transcodingJsonObject.getString("url"); + if (isNullOrEmpty(url)) { + continue; + } final String mediaUrl; final String preset = transcodingJsonObject.getString("preset"); final String protocol = transcodingJsonObject.getObject("format").getString("protocol"); - final String url = transcodingJsonObject.getString("url"); - final MediaFormat mediaFormat; - final int bitrate; - - if (!isNullOrEmpty(url)) { - if (preset.contains("mp3")) { - // Don't add the MP3 HLS stream if there is a progressive stream present - // because the two have the same bitrate - if (mp3ProgressiveInStreams && protocol.equals("hls")) { - continue; - } - mediaFormat = MediaFormat.MP3; - bitrate = 128; - } else if (preset.contains("opus")) { - mediaFormat = MediaFormat.OPUS; - bitrate = 64; - } else { - // Unknown format + MediaFormat mediaFormat = null; + int bitrate = 0; + if (preset.contains("mp3")) { + // Don't add the MP3 HLS stream if there is a progressive stream present + // because the two have the same bitrate + if (mp3ProgressiveInStreams && protocol.equals("hls")) { continue; } + mediaFormat = MediaFormat.MP3; + bitrate = 128; + } else if (preset.contains("opus")) { + mediaFormat = MediaFormat.OPUS; + bitrate = 64; + } + if (mediaFormat != null) { try { mediaUrl = getTranscodingUrl(url, protocol); - } catch (final Exception e) { - // something went wrong when parsing this transcoding - continue; + if (!mediaUrl.isEmpty()) { + audioStreams.add(new AudioStream(mediaUrl, mediaFormat, bitrate)); + } + } catch (final Exception ignored) { + // something went wrong when parsing this transcoding, don't add it to + // audioStreams } - audioStreams.add(new AudioStream(mediaUrl, mediaFormat, bitrate)); } } } - private final static Pattern PATTERN_HTTPS_URLS_IN_HLS_MANIFESTS = Pattern.compile("((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?+-=\\\\.&]*)"); - /** Parses a SoundCloud HLS manifest to get a single URL of HLS streams. *

- * This method downloads the provided manifest URL, find all web occurrences using a regex, get - * the last segment URL, changes its segment range to {@code 0/track-length} and return this - * string. + * This method downloads the provided manifest URL, find all web occurrences in the manifest, + * get the last segment URL, changes its segment range to {@code 0/track-length} and return + * this string. * @param hlsManifestUrl the URL of the manifest to be parsed * @return a single URL that contains a range equal to the length of the track */ @@ -304,20 +303,17 @@ private static String getSingleUrlFromHlsManifest(final String hlsManifestUrl) t throw new ParsingException("Could not get SoundCloud HLS manifest"); } - final List hlsRangesList = new ArrayList<>(); - final Matcher pattern_matches = PATTERN_HTTPS_URLS_IN_HLS_MANIFESTS - .matcher(hlsManifestResponse); - - while (pattern_matches.find()) { - hlsRangesList.add(hlsManifestResponse.substring(pattern_matches.start(0), - pattern_matches.end(0))); + final String[] lines = hlsManifestResponse.split("\\r?\\n"); + for (int l = lines.length - 1; l >= 0; l--) { + final String line = lines[l]; + // get the last URL from manifest, because it contains the range of the stream + if (line.trim().length() != 0 && !line.startsWith("#") && line.startsWith("https")) { + final String[] hlsLastRangeUrlArray = line.split("/"); + return HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" + + hlsLastRangeUrlArray[6]; + } } - - final String hlsLastRangeUrl = hlsRangesList.get(hlsRangesList.size() - 1); - final String[] hlsLastRangeUrlArray = hlsLastRangeUrl.split("/"); - - return HTTPS + hlsLastRangeUrlArray[2] + "/media/0/" + hlsLastRangeUrlArray[5] + "/" - + hlsLastRangeUrlArray[6]; + throw new ParsingException("Could not get any URL from HLS manifest"); } private static String urlEncode(final String value) {