diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java new file mode 100644 index 000000000..40169e045 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java @@ -0,0 +1,108 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Jsoup; + +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.utils.Utils; + +public class GonewildRipper extends AbstractRipper { + + private static final String HOST = "gonewild"; + private static final Logger logger = Logger.getLogger(GonewildRipper.class); + private static final int SLEEP_TIME = 1000; + + private static String API_DOMAIN; + private String username; + + public GonewildRipper(URL url) throws IOException { + super(url); + API_DOMAIN = Utils.getConfigString("gw.api", "gonewild"); + } + + @Override + public boolean canRip(URL url) { + return getUsernameMatcher(url).matches(); + } + + private Matcher getUsernameMatcher(URL url) { + Pattern p = Pattern.compile("^https?://[a-z]{0,3}\\.?reddit\\.com/(u|user)/([a-zA-Z0-9\\-]{3,})/?.*$"); + return p.matcher(url.toExternalForm()); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int start = 0, + count = 50; + String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi" + + "?method=get_user" + + "&user=" + username + + "&count=" + count; + String gwURL, jsonString, imagePath; + JSONArray posts, images; + JSONObject json, post, image; + while (true) { + logger.info(" Retrieving posts by " + username); + gwURL = baseGwURL + + "&start=" + start; + start += count; + jsonString = Jsoup.connect(gwURL) + .ignoreContentType(true) + .execute() + .body(); + json = new JSONObject(jsonString); + posts = json.getJSONArray("posts"); + if (posts.length() == 0) { + break; // No more posts to get + } + for (int i = 0; i < posts.length(); i++) { + post = (JSONObject) posts.get(i); + images = post.getJSONArray("images"); + for (int j = 0; j < images.length(); j++) { + image = (JSONObject) images.get(j); + imagePath = image.getString("path"); + if (imagePath.startsWith("..")) { + imagePath = imagePath.substring(2); + } + imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath; + logger.info(" Found file: " + imagePath); + addURLToDownload(new URL(imagePath)); + } + } + try { + Thread.sleep(SLEEP_TIME); + } catch (InterruptedException e) { + logger.error("[!] Interrupted while waiting to load more posts", e); + break; + } + } + waitForThreads(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Matcher m = getUsernameMatcher(url); + if (m.matches()) { + this.username = m.group(m.groupCount()); + } + return username; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 02a44fadd..24ca8ddfa 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -80,7 +80,10 @@ public void rip() throws IOException { while (true) { String url = baseURL + params; logger.info(" Retrieving " + url); - String jsonString = Jsoup.connect(url).ignoreContentType(true).execute().body(); + String jsonString = Jsoup.connect(url) + .ignoreContentType(true) + .execute() + .body(); JSONObject json = new JSONObject(jsonString); JSONArray datas = json.getJSONArray("data"); String nextMaxID = ""; diff --git a/src/main/resources/rip.properties b/src/main/resources/rip.properties index 2fbcf1134..b567b4af8 100644 --- a/src/main/resources/rip.properties +++ b/src/main/resources/rip.properties @@ -1,4 +1,5 @@ threads.size = 5 file.overwrite = false download.retries = 3 -twitter.auth = VW9Ybjdjb1pkd2J0U3kwTUh2VXVnOm9GTzVQVzNqM29LQU1xVGhnS3pFZzhKbGVqbXU0c2lHQ3JrUFNNZm8= \ No newline at end of file +twitter.auth = VW9Ybjdjb1pkd2J0U3kwTUh2VXVnOm9GTzVQVzNqM29LQU1xVGhnS3pFZzhKbGVqbXU0c2lHQ3JrUFNNZm8= +gw.api = gonewild diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java new file mode 100644 index 000000000..f9f748aea --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/GonewildRipperTest.java @@ -0,0 +1,31 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.GonewildRipper; + +public class GonewildRipperTest extends RippersTest { + + public void testInstagramAlbums() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + contentURLs.add(new URL("http://reddit.com/u/amle69")); + for (URL url : contentURLs) { + try { + GonewildRipper ripper = new GonewildRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}