Skip to content

Commit

Permalink
LusciousRipper fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
joroto authored and soloturn committed Nov 18, 2023
1 parent 6d7503f commit ce0e60c
Showing 1 changed file with 44 additions and 96 deletions.
140 changes: 44 additions & 96 deletions src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
package com.rarchives.ripme.ripper.rippers;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.
private static String albumid;

private static final Pattern P = Pattern.compile("^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net/albums/([-_.0-9a-zA-Z]+)/?");
private final DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");

public LusciousRipper(URL url) throws IOException {
super(url);
Expand All @@ -38,117 +37,66 @@ public String getHost() {
}

@Override
public Document getFirstPage() throws IOException {
return super.getFirstPage();
}

@Override
public List<String> getURLsFromPage(Document page) {
public List<String> getURLsFromPage(Document page) { // gets urls for all pages through the api
List<String> urls = new ArrayList<>();
Elements urlElements = page.select("div.item.thumbnail.ic_container > a");
for (Element e : urlElements) {
urls.add(e.attr("abs:href"));
}
int totalPages = 1;

for (int i = 1; i <= totalPages; i++) {
String APIStringWOVariables = "https://apicdn.luscious.net/graphql/nobatch/?operationName=PictureListInsideAlbum&query=%2520query%2520PictureListInsideAlbum%28%2524input%253A%2520PictureListInput%21%29%2520%257B%2520picture%2520%257B%2520list%28input%253A%2520%2524input%29%2520%257B%2520info%2520%257B%2520...FacetCollectionInfo%2520%257D%2520items%2520%257B%2520__typename%2520id%2520title%2520description%2520created%2520like_status%2520number_of_comments%2520number_of_favorites%2520moderation_status%2520width%2520height%2520resolution%2520aspect_ratio%2520url_to_original%2520url_to_video%2520is_animated%2520position%2520permissions%2520url%2520tags%2520%257B%2520category%2520text%2520url%2520%257D%2520thumbnails%2520%257B%2520width%2520height%2520size%2520url%2520%257D%2520%257D%2520%257D%2520%257D%2520%257D%2520fragment%2520FacetCollectionInfo%2520on%2520FacetCollectionInfo%2520%257B%2520page%2520has_next_page%2520has_previous_page%2520total_items%2520total_pages%2520items_per_page%2520url_complete%2520%257D%2520&variables=";
Connection con = Http.url(APIStringWOVariables + encodeVariablesPartOfURL(i, albumid)).method(Connection.Method.GET).retries(5).connection();
con.ignoreHttpErrors(true);
con.ignoreContentType(true);
con.userAgent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0");
Connection.Response res;
try {
res = con.execute();
} catch (IOException e) {
throw new RuntimeException(e);
}
String body = res.body();

return urls;
}
JSONObject jsonObject = new JSONObject(body);

@Override
public Document getNextPage(Document doc) throws IOException {
// luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
// Simply GET the nextPageUrl also works. Therefore, we do this...
Element nextPageElement = doc.select("div#next_page > div > a").first();
if (nextPageElement == null) {
throw new IOException("No next page found.");
JSONObject data = jsonObject.getJSONObject("data");
JSONObject picture = data.getJSONObject("picture");
JSONObject list = picture.getJSONObject("list");
JSONArray items = list.getJSONArray("items");
JSONObject info = list.getJSONObject("info");
totalPages = info.getInt("total_pages");

for (int j = 0; j < items.length(); j++) {
JSONObject item = items.getJSONObject(j);
String urlToOriginal = item.getString("url_to_original");
urls.add(urlToOriginal);
}
}

return Http.url(nextPageElement.attr("abs:href")).get();
return urls;
}

@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
albumid = m.group(1).split("_")[m.group(1).split("_").length - 1];
return m.group(1);
}
throw new MalformedURLException("Expected luscious.net URL format: "
+ "luscious.net/albums/albumname \n members.luscious.net/albums/albumname - got " + url + " instead.");
}

@Override
public void downloadURL(URL url, int index) {
lusciousThreadPool.addThread(new LusciousDownloadThread(url, index));
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null);
}

@Override
public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}

@Override
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException {
// Sanitizes the url removing GET parameters and convert to legacy api url.
// "https://legacy.luscious.net/albums/albumname"
public static String encodeVariablesPartOfURL(int page, String albumId) {
try {
Matcher m = P.matcher(url.toString());
if (m.matches()) {
String sanitizedUrl = m.group();
sanitizedUrl = sanitizedUrl.replaceFirst(
"^https?://(?:members\\.|legacy\\.|www\\.)?luscious.net",
"https://legacy.luscious.net");
return new URI(sanitizedUrl).toURL();
}

throw new Exception("ERROR: Unable to sanitize url.");
} catch (Exception e) {
LOGGER.info("Error sanitizing the url.");
LOGGER.error(e);
return super.sanitizeURL(url);
}
}
String json = "{\"input\":{\"filters\":[{\"name\":\"album_id\",\"value\":\"" + albumId + "\"}],\"display\":\"rating_all_time\",\"items_per_page\":50,\"page\":" + page + "}}";

@Override
public String normalizeUrl(String url) {
try {
return url.replaceFirst(
"^https?://(?:members\\.|legacy\\.)?luscious.net", "https://www.luscious.net");
} catch (Exception e) {
LOGGER.info("Error normalizing the url.");
LOGGER.error(e);
return super.normalizeUrl(url);
return URLEncoder.encode(json, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new IllegalStateException("Could not encode variables");
}
}

public class LusciousDownloadThread implements Runnable {
private final URL url;
private final int index;

public LusciousDownloadThread(URL url, int index) {
this.url = url;
this.index = index;
}

@Override
public void run() {
try {
Document page = Http.url(url).retries(RETRY_COUNT).get();

String downloadUrl = page.select(".icon-download").attr("abs:href");
if (downloadUrl.equals("")) {
// This is here for pages with mp4s instead of images.
downloadUrl = page.select("div > video > source").attr("src");
if (!downloadUrl.equals("")) {
throw new IOException("Could not find download url for image or video.");
}
}

//If a valid download url was found.
addURLToDownload(new URI(downloadUrl).toURL(), getPrefix(index));

} catch (IOException | URISyntaxException e) {
LOGGER.error("Error downloadiong url " + url, e);
}
}

}
}

0 comments on commit ce0e60c

Please sign in to comment.