Skip to content

Commit

Permalink
Merge 3f869ea into 1afb8ed
Browse files Browse the repository at this point in the history
  • Loading branch information
erik authored Jun 8, 2023
2 parents 1afb8ed + 3f869ea commit e728c44
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,25 @@
import javax.annotation.concurrent.Immutable;

/**
* A utility to download {@code planet.osm.pbf} files from <a href="https://registry.opendata.aws/osm/">AWS Open Data
* Registry</a>.
* A utility to download {@code planet.osm.pbf} files from public S3 sources such as
* <a href="https://registry.opendata.aws/osm/">AWS Open Data Registry</a> and
* <a href="https://overturemaps.org">Overture Maps Foundation</a>.
*/
public class AwsOsm {

private static final String BASE = "https://osm-pds.s3.amazonaws.com/";
private static volatile IndexXml index = null;
public static final AwsOsm OSM_PDS = new AwsOsm("https://osm-pds.s3.amazonaws.com/");
public static final AwsOsm OVERTURE = new AwsOsm("https://overturemaps-us-west-2.s3.amazonaws.com/");
private static final ObjectMapper mapper = new XmlMapper().registerModule(new Jdk8Module());

private final String bucketIndexUrl;
private volatile IndexXml index = null;

protected AwsOsm(String bucketIndexUrl) {
this.bucketIndexUrl = bucketIndexUrl;
}

/**
* Fetches the AWS Open Data Registry index and searches for a {@code .osm.pbf} resource to download where snapshot
* date matches {@code searchQuery}, or the latest snapshot if {@code searchQuery == "latest"}.
* Fetches the S3 bucket index and searches for a {@code .osm.pbf} resource to download where snapshot date matches
* {@code searchQuery}, or the latest snapshot if {@code searchQuery == "latest"}.
* <p>
* The index is only fetched once and cached after that.
*
Expand All @@ -34,14 +41,14 @@ public class AwsOsm {
* @return the URL of a {@code .osm.pbf} file with name or snapshot ID matching {@code searchQuery}
* @throws IllegalArgumentException if no matches, or more than one match is found.
*/
public static String getDownloadUrl(String searchQuery, PlanetilerConfig config) {
IndexXml index = getAndCacheIndex(config);
return searchIndexForDownloadUrl(searchQuery, index);
public String getDownloadUrl(String searchQuery, PlanetilerConfig config) {
IndexXml indexXml = getAndCacheIndex(config);
return searchIndexForDownloadUrl(searchQuery, indexXml);
}

private synchronized static IndexXml getAndCacheIndex(PlanetilerConfig config) {
private synchronized IndexXml getAndCacheIndex(PlanetilerConfig config) {
if (index == null) {
try (InputStream inputStream = Downloader.openStream(BASE, config)) {
try (InputStream inputStream = Downloader.openStream(bucketIndexUrl, config)) {
index = parseIndexXml(inputStream);
} catch (IOException e) {
throw new IllegalStateException(e);
Expand All @@ -50,21 +57,21 @@ private synchronized static IndexXml getAndCacheIndex(PlanetilerConfig config) {
return index;
}

static IndexXml parseIndexXml(InputStream indexXmlContent) throws IOException {
protected IndexXml parseIndexXml(InputStream indexXmlContent) throws IOException {
return mapper.readValue(indexXmlContent, IndexXml.class);
}

static String searchIndexForDownloadUrl(String searchQuery, IndexXml index) {
protected String searchIndexForDownloadUrl(String searchQuery, IndexXml index) {
if ("latest".equalsIgnoreCase(searchQuery)) {
return index.contents.stream()
.filter(c -> c.key.endsWith(".osm.pbf"))
.map(c -> BASE + c.key)
.map(c -> bucketIndexUrl + c.key)
.max(Comparator.naturalOrder())
.orElseThrow(() -> new IllegalArgumentException("Unable to find latest AWS osm download URL"));
} else {
List<String> results = index.contents.stream()
.filter(c -> c.key.endsWith("/planet-" + searchQuery + ".osm.pbf"))
.map(c -> BASE + c.key)
.map(c -> bucketIndexUrl + c.key)
.toList();
if (results.isEmpty()) {
throw new IllegalArgumentException("Unable to find AWS osm download URL for " + searchQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@
* a {@code .osm.pbf} download URL in the <a href="https://download.geofabrik.de/technical.html">Geofabrik JSON
* index</a>.
* <p>
* You can also use "aws:latest" to download the latest {@code planet.osm.pbf} file from the
* <a href="https://registry.opendata.aws/osm/">AWS Open Data Registry</a>.
* Use "aws:latest" to download the latest {@code planet.osm.pbf} file from the
* <a href="https://registry.opendata.aws/osm/">AWS Open Data Registry</a>, or "overture:latest" to download the latest
* <a href="https://overturemaps.org/">Overture Maps Foundation</a> release.
*/
@SuppressWarnings("UnusedReturnValue")
public class Downloader {
Expand Down Expand Up @@ -140,18 +141,20 @@ InputStream openStreamRange(String url, long start, long end) throws IOException
* {@code HEAD} request to the resource.
*
* @param id short name to use for this download when logging progress
* @param url the external resource to fetch, "aws:latest" (for the latest planet .osm.pbf), or "geofabrik:extract
* name" as a shortcut to use {@link Geofabrik#getDownloadUrl(String, PlanetilerConfig)} to look up a
* {@code .osm.pbf} <a href="https://download.geofabrik.de/">Geofabrik</a> extract URL by partial match
* on area name
* @param url the external resource to fetch, "aws:latest" (for the latest planet .osm.pbf), "overture:latest" (for
* the latest Overture Maps release) or "geofabrik:extract-name" as a shortcut to use
* {@link Geofabrik#getDownloadUrl(String, PlanetilerConfig)} to look up a {@code .osm.pbf}
* <a href="https://download.geofabrik.de/">Geofabrik</a> extract URL by partial match on area name
* @param output where to download the file to
* @return {@code this} for chaining
*/
public Downloader add(String id, String url, Path output) {
if (url.startsWith("geofabrik:")) {
url = Geofabrik.getDownloadUrl(url.replaceFirst("^geofabrik:", ""), config);
} else if (url.startsWith("aws:")) {
url = AwsOsm.getDownloadUrl(url.replaceFirst("^aws:", ""), config);
url = AwsOsm.OSM_PDS.getDownloadUrl(url.replaceFirst("^aws:", ""), config);
} else if (url.startsWith("overture:")) {
url = AwsOsm.OVERTURE.getDownloadUrl(url.replaceFirst("^overture:", ""), config);
}
toDownloadList.add(new ResourceToDownload(id, url, output));
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,26 +163,133 @@ class AwsOsmTest {
</ListBucketResult>
""".getBytes(StandardCharsets.UTF_8);

private static final byte[] overtureResponse =
"""
<?xml version="1.0" encoding="UTF-8"?>
<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<Name>overturemaps-us-west-2</Name>
<Prefix />
<Marker />
<MaxKeys>1000</MaxKeys>
<IsTruncated>false</IsTruncated>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-cook_county_il.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:47:32.000Z</LastModified>
<ETag>"89e67e64e866c56db9abe700652bd253-17"</ETag>
<Size>138598884</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-eastern_ma.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:47:57.000Z</LastModified>
<ETag>"983fbf7d59ea77478851059edf20ae82-17"</ETag>
<Size>139513514</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-king_county_wa.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:48:49.000Z</LastModified>
<ETag>"fb9b875482136a1d77b8722aa0ccdd49-8"</ETag>
<Size>60249069</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-maricopa_and_pinal_counties_az.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:49:12.000Z</LastModified>
<ETag>"fd0ac4f256e5e4937abdc471486637f6-15"</ETag>
<Size>125059646</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-orange_county_fl.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:49:59.000Z</LastModified>
<ETag>"3e1aa7ac82267d947daf3541f91cdb6d-4"</ETag>
<Size>25542787</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/building-extracts/2023-04-02-alpha-santa_clara_county_ca.geojsonseq.gz</Key>
<LastModified>2023-04-13T16:50:10.000Z</LastModified>
<ETag>"1a104df658f2667107a5d20ac4b503f2-8"</ETag>
<Size>64331924</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
<Contents>
<Key>release/2023-04-02-alpha/planet-2023-04-02-alpha.osm.pbf</Key>
<LastModified>2023-04-05T16:24:04.000Z</LastModified>
<ETag>"a45f7016445256b2735f7765c7668e87-5496"</ETag>
<Size>92196566177</Size>
<Owner>
<ID>15b7f69019aeb60070a7e5e0abef900903bcad93b2cd252fc9ac14881f1cea14</ID>
<DisplayName>sethfitz+overture-data-distribution</DisplayName>
</Owner>
<StorageClass>INTELLIGENT_TIERING</StorageClass>
</Contents>
</ListBucketResult>
"""
.getBytes(StandardCharsets.UTF_8);

@Test
void testFound() throws IOException {
var index = AwsOsm.parseIndexXml(new ByteArrayInputStream(response));
assertEquals("https://osm-pds.s3.amazonaws.com/2021/planet-210906.osm.pbf",
AwsOsm.searchIndexForDownloadUrl("210906", index));
assertEquals("https://osm-pds.s3.amazonaws.com/2021/planet-210830.osm.pbf",
AwsOsm.searchIndexForDownloadUrl("210830", index));
var awsOsm = new AwsOsm("https://base.url/");
var index = awsOsm.parseIndexXml(new ByteArrayInputStream(response));
assertEquals("https://base.url/2021/planet-210906.osm.pbf",
awsOsm.searchIndexForDownloadUrl("210906", index));
assertEquals("https://base.url/2021/planet-210830.osm.pbf",
awsOsm.searchIndexForDownloadUrl("210830", index));
}

@Test
void testLatest() throws IOException {
var index = AwsOsm.parseIndexXml(new ByteArrayInputStream(response));
String url = AwsOsm.searchIndexForDownloadUrl("latest", index);
assertEquals("https://osm-pds.s3.amazonaws.com/2021/planet-210906.osm.pbf", url);
var awsOsm = new AwsOsm("https://base.url/");
var index = awsOsm.parseIndexXml(new ByteArrayInputStream(response));
String url = awsOsm.searchIndexForDownloadUrl("latest", index);
assertEquals("https://base.url/2021/planet-210906.osm.pbf", url);
}

@Test
void testNotFound() throws IOException {
var index = AwsOsm.parseIndexXml(new ByteArrayInputStream(response));
var awsOsm = new AwsOsm("https://base.url/");
var index = awsOsm.parseIndexXml(new ByteArrayInputStream(response));
assertThrows(IllegalArgumentException.class,
() -> AwsOsm.searchIndexForDownloadUrl("1231", index));
() -> awsOsm.searchIndexForDownloadUrl("1231", index));
}

@Test
void testOvertureMaps() throws IOException {
var awsOsm = new AwsOsm("https://base.url/");
var index = awsOsm.parseIndexXml(new ByteArrayInputStream(overtureResponse));
assertEquals(
"https://base.url/release/2023-04-02-alpha/planet-2023-04-02-alpha.osm.pbf",
awsOsm.searchIndexForDownloadUrl("latest", index)
);
assertEquals(
"https://base.url/release/2023-04-02-alpha/planet-2023-04-02-alpha.osm.pbf",
awsOsm.searchIndexForDownloadUrl("2023-04-02-alpha", index)
);
}
}

0 comments on commit e728c44

Please sign in to comment.