Skip to content

Commit

Permalink
write sitemap to docroot #4261
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Sep 25, 2018
1 parent afe3d0f commit 61231b2
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
2 changes: 1 addition & 1 deletion doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ Provide Sitemap URL to Search Engines

Search engines have an easier time indexing content when you provide them a sitemap.

Adjust the URL https://demo.dataverse.org/sitemap.xml for your installation of Dataverse when following `Google's "submit a sitemap" instructions`_ or similar instructions for other search engines.
Adjust the URL https://demo.dataverse.org/sitemap/sitemap.xml for your installation of Dataverse when following `Google's "submit a sitemap" instructions`_ or similar instructions for other search engines.

.. _Google's "submit a sitemap" instructions: https://support.google.com/webmasters/answer/183668

Expand Down
29 changes: 26 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
Expand All @@ -16,11 +18,27 @@
import org.w3c.dom.Document;
import org.w3c.dom.Element;

// We are aware of https://github.com/dfabulich/sitemapgen4j but haven't tried it.
public class SiteMapUtil {

static final String SITEMAP_OUTFILE = "/tmp/out.xml";
private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName());

public static void updateSiteMap() throws ParserConfigurationException, TransformerException {
static final String SITEMAP_FILENAME = "sitemap.xml";

public static void updateSiteMap() throws ParserConfigurationException, TransformerException, IOException {

String sitemapPath = "/tmp";
String sitemapPathAndFile;
// i.e. /usr/local/glassfish4/glassfish/domains/domain1
String domainRoot = System.getProperty("com.sun.aas.instanceRoot");
if (domainRoot != null) {
// TODO: Is it possible to remove the "sitemap" directory? In glassfish-web.xml
// we added a directory because it seems to be required. If you add just a file, the war
// will fail to deploy if the file doesn't exist.
// http://harkiran-howtos.blogspot.com/2009/08/map-external-directory-into-glassfish.html
sitemapPath = domainRoot + File.separator + "docroot" + File.separator + "sitemap";
}
sitemapPathAndFile = sitemapPath + File.separator + SITEMAP_FILENAME;

DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Expand Down Expand Up @@ -51,7 +69,12 @@ public static void updateSiteMap() throws ParserConfigurationException, Transfor
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
DOMSource source = new DOMSource(document);
StreamResult result = new StreamResult(new File(SITEMAP_OUTFILE));
File directory = new File(sitemapPath);
if (!directory.exists()) {
directory.mkdir();
}
logger.info("Writing sitemap to " + sitemapPathAndFile);
StreamResult result = new StreamResult(new File(sitemapPathAndFile));
transformer.transform(source, result);

// TODO: Remove this once there's a lot of data.
Expand Down
1 change: 1 addition & 0 deletions src/main/webapp/WEB-INF/glassfish-web.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
<property name="alternatedocroot_1" value="from=/guides/* dir=./docroot"/>
<property name="alternatedocroot_2" value="from=/dataexplore/* dir=./docroot"/>
<property name="alternatedocroot_logos" value="from=/logos/* dir=./docroot"/>
<property name="alternatedocroot_sitemap" value="from=/sitemap/* dir=./docroot"/>
<parameter-encoding default-charset="UTF-8"/>
</glassfish-web-app>

0 comments on commit 61231b2

Please sign in to comment.