jbake-org · jonbullock · Jan 26, 2014 · Jan 24, 2014 · Dec 19, 2013 · Dec 19, 2013
diff --git a/README.asciidoc b/README.asciidoc
@@ -61,6 +61,8 @@ Talk to other users of JBake on the forum:
 * http://junit.org/[JUnit]
 * http://pegdown.org/[Pegdown]
 * http://www.eclipse.org/jetty/[Jetty Server]
+* http://www.orientdb.org/[OrientDB]
+* http://groovy.codehaus.org/[Groovy]
 
 == Copyright & License
 

diff --git a/pom.xml b/pom.xml
@@ -72,7 +72,9 @@
 		<junit.version>4.8.1</junit.version>
 		<pegdown.version>1.4.1</pegdown.version>
 		<jetty.version>8.1.12.v20130726</jetty.version>
-	</properties>
+        <orientdb.version>1.6.2</orientdb.version>
+        <groovy.version>2.2.1</groovy.version>
+    </properties>
 
 	<build>
 		<finalName>${project.artifactId}</finalName>
@@ -203,7 +205,19 @@
 			<groupId>org.freemarker</groupId>
 			<artifactId>freemarker</artifactId>
 			<version>${freemarker.version}</version>
+            <optional>true</optional>
 		</dependency>
+        <dependency>
+            <groupId>com.orientechnologies</groupId>
+            <artifactId>orient-commons</artifactId>
+            <version>${orientdb.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.orientechnologies</groupId>
+            <artifactId>orientdb-core</artifactId>
+            <version>${orientdb.version}</version>
+        </dependency>
+
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>
@@ -219,17 +233,26 @@
 			<groupId>org.pegdown</groupId>
 			<artifactId>pegdown</artifactId>
 			<version>${pegdown.version}</version>
+            <optional>true</optional>
 		</dependency>
 		<dependency>
 			<groupId>org.asciidoctor</groupId>
 			<artifactId>asciidoctor-java-integration</artifactId>
 			<version>${asciidoctor.java.version}</version>
+            <optional>true</optional>
 		</dependency>
 		<dependency>
 			<groupId>org.eclipse.jetty</groupId>
 			<artifactId>jetty-server</artifactId>
 			<version>${jetty.version}</version>
+            <optional>true</optional>
 		</dependency>
+        <dependency>
+            <groupId>org.codehaus.groovy</groupId>
+            <artifactId>groovy-all</artifactId>
+            <version>${groovy.version}</version>
+            <optional>true</optional>
+        </dependency>
 		<!-- sl4j Logging -->
 		<!-- <dependency>
 			<groupId>org.slf4j</groupId>

diff --git a/src/main/java/org/jbake/app/Crawler.java b/src/main/java/org/jbake/app/Crawler.java
@@ -1,125 +1,172 @@
 package org.jbake.app;
 
-import static java.io.File.separator;
+import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx;
+import com.orientechnologies.orient.core.record.impl.ODocument;
+import com.orientechnologies.orient.core.sql.query.OSQLSynchQuery;
+import org.apache.commons.configuration.CompositeConfiguration;
+import org.jbake.model.DocumentStatus;
+import org.jbake.model.DocumentTypes;
 
 import java.io.File;
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Date;
-import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
-import org.apache.commons.configuration.CompositeConfiguration;
+import static java.io.File.separator;
 
 /**
  * Crawls a file system looking for content.
- * 
- * @author Jonathan Bullock <jonbullock@gmail.com>
  *
+ * @author Jonathan Bullock <jonbullock@gmail.com>
  */
 public class Crawler {
-
-	// TODO: replace separate lists with custom impl of hashmap that provides methods
-	// TODO: to get back certain types of content (i.e. pages or posts), this allows for 
-	// TODO: support of extra types with very little extra dev 
-
-	private CompositeConfiguration config;
-	private Parser parser;
-
-	private List<Map<String, Object>> pages = new ArrayList<Map<String, Object>>();
-	private List<Map<String, Object>> posts = new ArrayList<Map<String, Object>>();
-	private Map<String, List<Map<String, Object>>> postsByTags = new HashMap<String, List<Map<String, Object>>>();
-//	private Map<String, List<Map<String, Object>>> postsByarchive = new HashMap<String, List<Map<String, Object>>>();
-   private String contentPath;
-
-	/**
-	 * Creates new instance of Crawler.
-	 * 
-	 */
-	public Crawler(File source, CompositeConfiguration config) {
-		this.config = config;
-		this.contentPath = source.getPath() + separator + config.getString("content.folder");
-		this.parser = new Parser(config,contentPath);
-	}
-
-	/**
-	 * Crawl all files and folders looking for content.
-	 * 
-	 * @param path	Folder to start from
-	 */
-	public void crawl(File path) {
-		File[] contents = path.listFiles(FileUtil.getFileFilter());
-		if (contents != null) {
-			Arrays.sort(contents);
-			for (int i = 0; i < contents.length; i++) {
-				if (contents[i].isFile()) {
-					System.out.print("Processing [" + contents[i].getPath() + "]... ");
-					Map<String, Object> fileContents = parser.processFile(contents[i]);
-					if (fileContents != null) {
-						fileContents.put("file", contents[i].getPath());
-						String uri = contents[i].getPath().replace(contentPath, "");
-						uri = uri.substring(0, uri.lastIndexOf("."));
-						fileContents.put("uri", uri+config.getString("output.extension"));
-
-						if (fileContents.get("type").equals("page")) {
-							pages.add(fileContents);
-						} else {
-							// everything else is considered a post
-							posts.add(fileContents);
-							if (fileContents.get("tags") != null) {
-								String[] tags = (String[]) fileContents.get("tags");
-								for (String tag : tags) {
-									if (postsByTags.containsKey(tag)) {
-										postsByTags.get(tag).add(fileContents);
-									} else {
-										List<Map<String, Object>> posts = new ArrayList<Map<String, Object>>();
-										posts.add(fileContents);
-										postsByTags.put(tag, posts);
-									}
-								}
-							}
-
-							if (fileContents.get("status").equals("published-date")) {
-								if (fileContents.get("date") != null && (fileContents.get("date") instanceof Date)) {
-									if (new Date().after((Date)fileContents.get("date"))) {
-										fileContents.put("status", "published");
-									}
-								}
-							}
-						}
-						System.out.println("done!");
-					}
-				} 
-
-				if (contents[i].isDirectory()) {
-					crawl(contents[i]);
-				}
-			}
-		}
-	}
-
-	public List<Map<String, Object>> getPages() {
-		return pages;
-	}
-
-	public void setPages(List<Map<String, Object>> pages) {
-		this.pages = pages;
-	}
-
-	public List<Map<String, Object>> getPosts() {
-		return posts;
-	}
-
-	public void setPosts(List<Map<String, Object>> posts) {
-		this.posts = posts;
-	}
-
-	public Map<String, List<Map<String, Object>>> getPostsByTags() {
-		return postsByTags;
-	}
-
-	public void setPostsByTags(Map<String, List<Map<String, Object>>> postsByTags) {
-		this.postsByTags = postsByTags;
-	}
+
+    private CompositeConfiguration config;
+    private Parser parser;
+    private final ODatabaseDocumentTx db;
+    private String contentPath;
+
+    /**
+     * Creates new instance of Crawler.
+     */
+    public Crawler(ODatabaseDocumentTx db, File source, CompositeConfiguration config) {
+        this.db = db;
+        this.config = config;
+        this.contentPath = source.getPath() + separator + config.getString("content.folder");
+        this.parser = new Parser(config, contentPath);
+    }
+
+    /**
+     * Crawl all files and folders looking for content.
+     *
+     * @param path Folder to start from
+     */
+    public void crawl(File path) {
+        File[] contents = path.listFiles(FileUtil.getFileFilter());
+        if (contents != null) {
+            Arrays.sort(contents);
+            for (File sourceFile : contents) {
+                if (sourceFile.isFile()) {
+                    System.out.print("Processing [" + sourceFile.getPath() + "]... ");
+                    String sha1 = buildHash(sourceFile);
+                    String uri = buildURI(sourceFile);
+                    boolean process = true;
+                    DocumentStatus status = DocumentStatus.NEW;
+                    for (String docType : DocumentTypes.getDocumentTypes()) {
+                        status = findDocumentStatus(docType, uri, sha1);
+                        switch (status) {
+                            case UPDATED:
+                                System.out.print(" : modified ");
+                                DBUtil.update(db, "delete from " + docType + " where uri=?", uri);
+                                break;
+                            case IDENTICAL:
+                                System.out.print(" : same ");
+                                process = false;
+                        }
+                        if (!process) {
+                            break;
+                        }
+                    }
+                    if (DocumentStatus.NEW == status) {
+                        System.out.print(" : new ");
+                    }
+                    if (process) { // new or updated
+                        crawlSourceFile(sourceFile, sha1, uri);
+                    }
+                }
+                if (sourceFile.isDirectory()) {
+                    crawl(sourceFile);
+                } else {
+                    System.out.println("done!");
+                }
+            }
+        }
+    }
+
+    private String buildHash(final File sourceFile) {
+        String sha1;
+        try {
+            sha1 = FileUtil.sha1(sourceFile);
+        } catch (Exception e) {
+            e.printStackTrace();
+            sha1 = "";
+        }
+        return sha1;
+    }
+
+    private String buildURI(final File sourceFile) {
+        String uri = sourceFile.getPath().replace(contentPath, "");
+        uri = uri.substring(0, uri.lastIndexOf(".")) + config.getString("output.extension");
+        return uri;
+    }
+
+    private void crawlSourceFile(final File sourceFile, final String sha1, final String uri) {
+        Map<String, Object> fileContents = parser.processFile(sourceFile);
+        fileContents.put("sha1", sha1);
+        fileContents.put("rendered", false);
+        if (fileContents != null) {
+            if (fileContents.get("tags") != null) {
+                // store them as a String[]
+                String[] tags = (String[]) fileContents.get("tags");
+                fileContents.put("tags", tags);
+            }
+            fileContents.put("file", sourceFile.getPath());
+            fileContents.put("uri", uri);
+
+            String documentType = (String) fileContents.get("type");
+            if (fileContents.get("status").equals("published-date")) {
+                if (fileContents.get("date") != null && (fileContents.get("date") instanceof Date)) {
+                    if (new Date().after((Date) fileContents.get("date"))) {
+                        fileContents.put("status", "published");
+                    }
+                }
+            }
+            ODocument doc = new ODocument(documentType);
+            doc.fields(fileContents);
+            boolean cached = fileContents.get("cached") != null ? Boolean.valueOf((String)fileContents.get("cached")):true;
+            doc.field("cached", cached);
+            doc.save();
+        }
+    }
+
+    public int getDocumentCount(String docType) {
+        return (int) db.countClass(docType);
+    }
+
+    public int getPostCount() {
+        return getDocumentCount("post");
+    }
+
+    public int getPageCount() {
+        return getDocumentCount("page");
+    }
+
+    public Set<String> getTags() {
+        List<ODocument> query = db.query(new OSQLSynchQuery<ODocument>("select tags from post where status='published'"));
+        Set<String> result = new HashSet<String>();
+        for (ODocument document : query) {
+            String[] tags = DBUtil.toStringArray(document.field("tags"));
+            Collections.addAll(result, tags);
+        }
+        return result;
+    }
+
+    private DocumentStatus findDocumentStatus(String docType, String uri, String sha1) {
+        List<ODocument> match = DBUtil.query(db, "select sha1,rendered from " + docType + " where uri=?", uri);
+        if (!match.isEmpty()) {
+            ODocument entries = match.get(0);
+            String oldHash = entries.field("sha1");
+            if (!(oldHash.equals(sha1)) || Boolean.FALSE.equals(entries.field("rendered"))) {
+                return DocumentStatus.UPDATED;
+            } else {
+                return DocumentStatus.IDENTICAL;
+            }
+        } else {
+            return DocumentStatus.NEW;
+        }
+    }
 }