From 70cfc7a57c9faac1aa780d2a64132c697ecfefed Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Tue, 8 Feb 2022 05:26:54 -0500 Subject: [PATCH 1/5] check space before downloading --- .../com/onthegomap/planetiler/Planetiler.java | 2 +- .../planetiler/config/PlanetilerConfig.java | 2 +- .../planetiler/util/Downloader.java | 34 +++++++++++++++--- .../onthegomap/planetiler/util/Format.java | 5 +++ .../planetiler/util/DownloaderTest.java | 35 ++++++++++++++++--- 5 files changed, 68 insertions(+), 10 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index 96a826417a..c162bd9ac0 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -444,7 +444,7 @@ public void run() throws Exception { System.exit(0); } else if (onlyDownloadSources) { // don't check files if not generating map - } else if (overwrite || config.forceOverwrite()) { + } else if (overwrite || config.force()) { FileUtils.deleteFile(output); } else if (Files.exists(output)) { throw new IllegalArgumentException(output + " already exists, use the --force argument to overwrite."); diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index 27b7cd666a..de8cb3e290 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -15,7 +15,7 @@ public record PlanetilerConfig( boolean deferIndexCreation, boolean optimizeDb, boolean emitTilesInOrder, - boolean forceOverwrite, + boolean force, boolean gzipTempStorage, int sortMaxReaders, int sortMaxWriters, diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java index 7922c2b916..25f6a58734 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java @@ -21,12 +21,15 @@ import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; +import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -47,7 +50,7 @@ * Downloader.create(PlanetilerConfig.defaults()) * .add("natural_earth", "http://url/of/natural_earth.zip", Path.of("natural_earth.zip")) * .add("osm", "http://url/of/file.osm.pbf", Path.of("file.osm.pbf")) - * .start(); + * .run(); * } *

* As a shortcut to find the URL of a file to download from the Geofabrik @@ -71,6 +74,7 @@ public class Downloader { private final ExecutorService executor; private final Stats stats; private final long chunkSizeBytes; + private final ConcurrentMap bytesToDownload = new ConcurrentHashMap<>(); Downloader(PlanetilerConfig config, Stats stats, long chunkSizeBytes) { this.chunkSizeBytes = chunkSizeBytes; @@ -202,6 +206,7 @@ CompletableFuture downloadIfNecessary(ResourceToDownload resourceToDownload) Path tmpPath = resourceToDownload.tmpPath(); FileUtils.delete(tmpPath); FileUtils.deleteOnExit(tmpPath); + checkDiskSpace(tmpPath, metadata.size); return httpDownload(resourceToDownload, tmpPath) .thenCompose(result -> { try { @@ -223,6 +228,27 @@ CompletableFuture downloadIfNecessary(ResourceToDownload resourceToDownload) }, executor); } + private void checkDiskSpace(Path destination, long size) { + try { + var fs = Files.getFileStore(destination.getParent()); + var totalPendingBytes = bytesToDownload.merge(fs, size, Long::sum); + var availableBytes = fs.getUnallocatedSpace(); + if (totalPendingBytes > availableBytes) { + var format = Format.defaultInstance(); + String warning = + "Attempting to download " + format.storage(totalPendingBytes) + " to " + fs + " which only has " + + format.storage(availableBytes) + " available"; + if (config.force()) { + LOGGER.warn(warning + ", will probably fail."); + } else { + throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); + } + } + } catch (IOException e) { + LOGGER.warn("Unable to check file size for download, you may run out of space: " + e, e); + } + } + private CompletableFuture httpHeadFollowRedirects(String url, int redirects) { if (redirects > MAX_REDIRECTS) { throw new IllegalStateException("Exceeded " + redirects + " redirects for " + url); @@ -325,9 +351,9 @@ private HttpRequest.Builder newHttpRequest(String url) { .header(USER_AGENT, config.httpUserAgent()); } - static record ResourceMetadata(Optional redirect, String canonicalUrl, long size, boolean acceptRange) {} + record ResourceMetadata(Optional redirect, String canonicalUrl, long size, boolean acceptRange) {} - static record ResourceToDownload( + record ResourceToDownload( String id, String url, Path output, CompletableFuture metadata, AtomicLong progress ) { @@ -347,7 +373,7 @@ public long bytesDownloaded() { /** * Wrapper for a {@link ReadableByteChannel} that captures progress information. */ - private static record ProgressChannel(ReadableByteChannel inner, AtomicLong progress) implements ReadableByteChannel { + private record ProgressChannel(ReadableByteChannel inner, AtomicLong progress) implements ReadableByteChannel { @Override public int read(ByteBuffer dst) throws IOException { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Format.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Format.java index 739f97793d..9f78a7a7f3 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Format.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Format.java @@ -79,6 +79,11 @@ public String storage(Number num, boolean pad) { return format(num, pad, STORAGE_SUFFIXES); } + /** Alias for {@link #storage(Number, boolean)} where {@code pad=false}. */ + public String storage(Number num) { + return storage(num, false); + } + /** Returns a number formatted like "123" "1.2k" "2.5B", etc. */ public String numeric(Number num, boolean pad) { return format(num, pad, NUMERIC_SUFFIXES); diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java index 031ac3ef2d..ccbeb62bc2 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/util/DownloaderTest.java @@ -1,9 +1,6 @@ package com.onthegomap.planetiler.util; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import com.onthegomap.planetiler.config.PlanetilerConfig; import com.onthegomap.planetiler.stats.Stats; @@ -18,6 +15,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; +import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -122,4 +120,33 @@ public void testDownload(boolean range, int maxLength, int redirects) throws Exc assertEquals(FileUtils.size(path), FileUtils.size(dest)); assertEquals(5, resource4.bytesDownloaded()); } + + @Test + public void testDownloadFailsIfTooBig() { + var downloader = new Downloader(config, stats, 2L) { + + @Override + InputStream openStream(String url) { + throw new AssertionError("Shouldn't get here"); + } + + @Override + InputStream openStreamRange(String url, long start, long end) { + throw new AssertionError("Shouldn't get here"); + } + + @Override + CompletableFuture httpHead(String url) { + return CompletableFuture.completedFuture(new ResourceMetadata(Optional.empty(), url, Long.MAX_VALUE, true)); + } + }; + + Path dest = path.resolve("out"); + String url = "http://url"; + + var resource1 = new Downloader.ResourceToDownload("resource", url, dest); + var exception = assertThrows(ExecutionException.class, () -> downloader.downloadIfNecessary(resource1).get()); + assertInstanceOf(IllegalArgumentException.class, exception.getCause()); + assertTrue(exception.getMessage().contains("--force"), exception.getMessage()); + } } From eb44bffc8cef6256be52ab3ab062908a980c553e Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Tue, 8 Feb 2022 08:52:13 -0500 Subject: [PATCH 2/5] add resource warnings --- .../planetiler/basemap/BasemapProfile.java | 20 ++++- .../com/onthegomap/planetiler/Planetiler.java | 87 +++++++++++++++++++ .../com/onthegomap/planetiler/Profile.java | 24 +++++ .../planetiler/collection/LongLongMap.java | 47 ++++++++-- .../planetiler/config/PlanetilerConfig.java | 2 +- .../planetiler/reader/osm/OsmInputFile.java | 11 ++- .../planetiler/PlanetilerTests.java | 57 ++++++++++++ 7 files changed, 239 insertions(+), 9 deletions(-) diff --git a/planetiler-basemap/src/main/java/com/onthegomap/planetiler/basemap/BasemapProfile.java b/planetiler-basemap/src/main/java/com/onthegomap/planetiler/basemap/BasemapProfile.java index a9cc055f50..bcf1107db2 100644 --- a/planetiler-basemap/src/main/java/com/onthegomap/planetiler/basemap/BasemapProfile.java +++ b/planetiler-basemap/src/main/java/com/onthegomap/planetiler/basemap/BasemapProfile.java @@ -182,6 +182,24 @@ public String version() { return OpenMapTilesSchema.VERSION; } + @Override + public long estimateIntermediateDiskBytes(long osmFileSize) { + // in late 2021, a 60gb OSM file used 200GB for intermediate storage + return osmFileSize * 200 / 60; + } + + @Override + public long estimateOutputBytes(long osmFileSize) { + // in late 2021, a 60gb OSM file generated a 100GB output file + return osmFileSize * 100 / 60; + } + + @Override + public long estimateRamRequired(long osmFileSize) { + // 30gb for a 60gb OSM file is generally safe, although less might be OK too + return osmFileSize / 2; + } + /** * Layers should implement this interface to subscribe to elements from natural * earth. @@ -244,7 +262,7 @@ public interface OsmAllProcessor { */ public interface IgnoreWikidata {} - private static record RowDispatch( + private record RowDispatch( Tables.Constructor constructor, List> handlers ) {} diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index c162bd9ac0..e85dcd0347 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -10,19 +10,26 @@ import com.onthegomap.planetiler.reader.ShapefileReader; import com.onthegomap.planetiler.reader.osm.OsmInputFile; import com.onthegomap.planetiler.reader.osm.OsmReader; +import com.onthegomap.planetiler.stats.ProcessInfo; import com.onthegomap.planetiler.stats.Stats; import com.onthegomap.planetiler.stats.Timers; import com.onthegomap.planetiler.util.Downloader; import com.onthegomap.planetiler.util.FileUtils; +import com.onthegomap.planetiler.util.Format; import com.onthegomap.planetiler.util.Geofabrik; import com.onthegomap.planetiler.util.LogUtil; import com.onthegomap.planetiler.util.Translations; import com.onthegomap.planetiler.util.Wikidata; import com.onthegomap.planetiler.worker.RunnableThatThrows; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -475,6 +482,9 @@ public void run() throws Exception { download(); } ensureInputFilesExist(); + Files.createDirectories(tmpDir); + checkDiskSpace(); + checkMemory(); if (onlyDownloadSources) { return; // exit only if just downloading } @@ -516,6 +526,83 @@ public void run() throws Exception { stats.close(); } + private void checkDiskSpace() { + Map bytesRequested = new HashMap<>(); + long osmSize = osmInputFile.diskUsageBytes(); + long nodeMapSize = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), osmSize); + long featureSize = profile.estimateIntermediateDiskBytes(osmSize); + long outputSize = profile.estimateOutputBytes(osmSize); + + try { + bytesRequested.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum); + bytesRequested.merge(Files.getFileStore(tmpDir), featureSize, Long::sum); + bytesRequested.merge(Files.getFileStore(output.getParent()), outputSize, Long::sum); + for (var entry : bytesRequested.entrySet()) { + var fs = entry.getKey(); + var requested = entry.getValue(); + long available = fs.getUnallocatedSpace(); + if (available < requested) { + var format = Format.defaultInstance(); + String warning = + "Planetiler needs ~" + format.storage(requested) + " on " + fs + " which only has " + + format.storage(available) + " available"; + if (config.force() || requested < available * 1.25) { + LOGGER.warn(warning + ", may fail."); + } else { + throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); + } + } + } + } catch (IOException e) { + LOGGER.warn("Unable to check disk space requirements, may run out of room " + e); + } + + } + + private void checkMemory() { + var format = Format.defaultInstance(); + long nodeMap = LongLongMap.estimateMemoryUsage(config.nodeMapType(), config.nodeMapStorage(), + osmInputFile.diskUsageBytes()); + long profile = profile().estimateRamRequired(osmInputFile.diskUsageBytes()); + long requested = nodeMap + profile; + long jvmMemory = ProcessInfo.getMaxMemoryBytes(); + + if (jvmMemory < requested) { + String warning = + "Planetiler needs ~" + format.storage(requested) + " memory for the JVM, but only " + + format.storage(jvmMemory) + " is available"; + if (config.force() || requested < jvmMemory * 1.25) { + LOGGER.warn(warning + ", may fail."); + } else { + throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); + } + } + + long nodeMapBytes = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), + osmInputFile.diskUsageBytes()); + if (nodeMapBytes > 0 + && ManagementFactory.getOperatingSystemMXBean() instanceof com.sun.management.OperatingSystemMXBean os) { + long systemMemory = os.getTotalMemorySize(); + long availableForDiskCache = systemMemory - jvmMemory; + if (nodeMapBytes > availableForDiskCache) { + LOGGER.warn( + """ + Planetiler will store node locations in a %s memory-mapped file. It is recommended to have at least that + much free RAM available on the system for the OS to cache the memory-mapped file, or else the import may + slow down substantially. There is %s total memory available and the JVM will use %s which only leaves %s. + You may want to reduce the -Xmx JVM setting, run on a system with more RAM, or increase -Xmx to at least + %s and use --nodemap-storage=ram instead. + """.formatted( + format.storage(nodeMapBytes), + format.storage(systemMemory), + format.storage(jvmMemory), + format.storage(systemMemory - jvmMemory), + format.storage(jvmMemory + nodeMapBytes) + )); + } + } + } + public Arguments arguments() { return arguments; } diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Profile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Profile.java index eba0cf59d8..c3060db43d 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Profile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Profile.java @@ -194,6 +194,30 @@ default boolean caresAboutSource(String name) { return true; } + /** + * Returns an estimate for how many bytes of disk this profile will use for intermediate feature storage to warn when + * running with insufficient disk space. + */ + default long estimateIntermediateDiskBytes(long osmFileSize) { + return 0L; + } + + /** + * Returns an estimate for how many bytes the output file will be to warn when running with insufficient disk space. + */ + default long estimateOutputBytes(long osmFileSize) { + return 0L; + } + + /** + * Returns an estimate for how many bytes of RAM this will use to warn when running with insufficient memory. + *

+ * This should include memory for things the profile stores in memory, as well as relations and multipolygons. + */ + default long estimateRamRequired(long osmFileSize) { + return 0L; + } + /** * A default implementation of {@link Profile} that emits no output elements. */ diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongLongMap.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongLongMap.java index 17d005e219..c2e621913a 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongLongMap.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/collection/LongLongMap.java @@ -44,11 +44,7 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis * @throws IllegalArgumentException if {@code name} or {@code storage} is not valid */ static LongLongMap from(String name, String storage, Path path) { - boolean ram = switch (storage) { - case "ram" -> true; - case "mmap" -> false; - default -> throw new IllegalArgumentException("Unexpected storage value: " + storage); - }; + boolean ram = isRam(storage); return switch (name) { case "noop" -> noop(); @@ -58,6 +54,47 @@ static LongLongMap from(String name, String storage, Path path) { }; } + /** Estimates the number of bytes of RAM this nodemap will use for a given OSM input file. */ + static long estimateMemoryUsage(String name, String storage, long osmFileSize) { + boolean ram = isRam(storage); + long nodes = estimateNumNodes(osmFileSize); + + return switch (name) { + case "noop" -> 0; + case "sortedtable" -> 300_000_000L + (ram ? 12 * nodes : 0L); + case "sparsearray" -> 300_000_000L + (ram ? 9 * nodes : 0L); + default -> throw new IllegalArgumentException("Unexpected value: " + name); + }; + } + + /** Estimates the number of bytes of disk this nodemap will use for a given OSM input file. */ + static long estimateDiskUsage(String name, String storage, long osmFileSize) { + if (isRam(storage)) { + return 0; + } else { + long nodes = estimateNumNodes(osmFileSize); + return switch (name) { + case "noop" -> 0; + case "sortedtable" -> 12 * nodes; + case "sparsearray" -> 9 * nodes; + default -> throw new IllegalArgumentException("Unexpected value: " + name); + }; + } + } + + private static boolean isRam(String storage) { + return switch (storage) { + case "ram" -> true; + case "mmap" -> false; + default -> throw new IllegalArgumentException("Unexpected storage value: " + storage); + }; + } + + private static long estimateNumNodes(long osmFileSize) { + // In February 2022, planet.pbf was 62GB with 750m nodes, so scale from there + return Math.round(750_000_000d * (osmFileSize / 62_000_000_000d)); + } + /** Returns a longlong map that stores no data and throws on read */ static LongLongMap noop() { return new LongLongMap() { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java index de8cb3e290..d83728b126 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/config/PlanetilerConfig.java @@ -61,7 +61,7 @@ public static PlanetilerConfig from(Arguments arguments) { arguments.getBoolean("defer_mbtiles_index_creation", "skip adding index to mbtiles file", false), arguments.getBoolean("optimize_db", "optimize mbtiles after writing", false), arguments.getBoolean("emit_tiles_in_order", "emit tiles in index order", true), - arguments.getBoolean("force", "force overwriting output file", false), + arguments.getBoolean("force", "overwriting output file and ignore disk/RAM warnings", false), arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false), arguments.getInteger("sort_max_readers", "maximum number of concurrent read threads to use when sorting chunks", 6), diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/osm/OsmInputFile.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/osm/OsmInputFile.java index e0dec106b9..bbf767b3d9 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/osm/OsmInputFile.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/reader/osm/OsmInputFile.java @@ -6,6 +6,8 @@ import com.graphhopper.reader.osm.pbf.PbfStreamSplitter; import com.graphhopper.reader.osm.pbf.Sink; import com.onthegomap.planetiler.config.Bounds; +import com.onthegomap.planetiler.util.DiskBacked; +import com.onthegomap.planetiler.util.FileUtils; import com.onthegomap.planetiler.worker.WorkerPipeline; import java.io.BufferedInputStream; import java.io.DataInputStream; @@ -29,7 +31,7 @@ * * @see OSM PBF Format */ -public class OsmInputFile implements Bounds.Provider, OsmSource { +public class OsmInputFile implements Bounds.Provider, OsmSource, DiskBacked { private final Path path; @@ -115,7 +117,12 @@ public WorkerPipeline.SourceStep read(String poolName, int thread return next -> readTo(next, poolName, threads); } - private static record ReaderElementSink(Consumer queue) implements Sink { + @Override + public long diskUsageBytes() { + return FileUtils.size(path); + } + + private record ReaderElementSink(Consumer queue) implements Sink { @Override public void process(ReaderElement readerElement) { diff --git a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java index 8cf3c8307c..dae1ca3cd3 100644 --- a/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java +++ b/planetiler-core/src/test/java/com/onthegomap/planetiler/PlanetilerTests.java @@ -1608,6 +1608,63 @@ public void processFeature(SourceFeature source, FeatureCollector features) { } } + private void runWithProfile(Path tempDir, Profile profile, boolean force) throws Exception { + Planetiler.create(Arguments.of("tmpdir", tempDir, "force", Boolean.toString(force))) + .setProfile(profile) + .addOsmSource("osm", TestUtils.pathToResource("monaco-latest.osm.pbf")) + .addNaturalEarthSource("ne", TestUtils.pathToResource("natural_earth_vector.sqlite")) + .addShapefileSource("shapefile", TestUtils.pathToResource("shapefile.zip")) + .setOutput("mbtiles", tempDir.resolve("output.mbtiles")) + .run(); + } + + @Test + public void testPlanetilerMemoryCheck(@TempDir Path tempDir) { + assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateIntermediateDiskBytes(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, false) + ); + assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateOutputBytes(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, false) + ); + assertThrows(Exception.class, () -> runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateRamRequired(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, false) + ); + } + + @Test + public void testPlanetilerMemoryCheckForce(@TempDir Path tempDir) throws Exception { + runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateIntermediateDiskBytes(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, true); + runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateOutputBytes(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, true); + runWithProfile(tempDir, new Profile.NullProfile() { + @Override + public long estimateRamRequired(long osmSize) { + return Long.MAX_VALUE / 10L; + } + }, true); + } + @Test public void testHandleProfileException() throws Exception { var results = runWithOsmElements( From 923eae2f5ef47b57febf7eed06d5dd8008a58dcd Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Tue, 15 Feb 2022 06:19:18 -0500 Subject: [PATCH 3/5] fix some resource warnings --- .../com/onthegomap/planetiler/Planetiler.java | 31 +++---------------- .../planetiler/util/Downloader.java | 2 +- .../onthegomap/planetiler/util/FileUtils.java | 5 ++- 3 files changed, 9 insertions(+), 29 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index e85dcd0347..b361cdf5cf 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -22,13 +22,13 @@ import com.onthegomap.planetiler.util.Wikidata; import com.onthegomap.planetiler.worker.RunnableThatThrows; import java.io.IOException; -import java.lang.management.ManagementFactory; import java.nio.file.FileStore; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.function.Function; import org.slf4j.Logger; @@ -536,7 +536,7 @@ private void checkDiskSpace() { try { bytesRequested.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum); bytesRequested.merge(Files.getFileStore(tmpDir), featureSize, Long::sum); - bytesRequested.merge(Files.getFileStore(output.getParent()), outputSize, Long::sum); + bytesRequested.merge(Files.getFileStore(output.toAbsolutePath().getParent()), outputSize, Long::sum); for (var entry : bytesRequested.entrySet()) { var fs = entry.getKey(); var requested = entry.getValue(); @@ -570,37 +570,14 @@ private void checkMemory() { if (jvmMemory < requested) { String warning = "Planetiler needs ~" + format.storage(requested) + " memory for the JVM, but only " - + format.storage(jvmMemory) + " is available"; + + format.storage(jvmMemory) + " is available, try setting -Xmx=" + format.storage(requested).toLowerCase( + Locale.ROOT); if (config.force() || requested < jvmMemory * 1.25) { LOGGER.warn(warning + ", may fail."); } else { throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); } } - - long nodeMapBytes = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), - osmInputFile.diskUsageBytes()); - if (nodeMapBytes > 0 - && ManagementFactory.getOperatingSystemMXBean() instanceof com.sun.management.OperatingSystemMXBean os) { - long systemMemory = os.getTotalMemorySize(); - long availableForDiskCache = systemMemory - jvmMemory; - if (nodeMapBytes > availableForDiskCache) { - LOGGER.warn( - """ - Planetiler will store node locations in a %s memory-mapped file. It is recommended to have at least that - much free RAM available on the system for the OS to cache the memory-mapped file, or else the import may - slow down substantially. There is %s total memory available and the JVM will use %s which only leaves %s. - You may want to reduce the -Xmx JVM setting, run on a system with more RAM, or increase -Xmx to at least - %s and use --nodemap-storage=ram instead. - """.formatted( - format.storage(nodeMapBytes), - format.storage(systemMemory), - format.storage(jvmMemory), - format.storage(systemMemory - jvmMemory), - format.storage(jvmMemory + nodeMapBytes) - )); - } - } } public Arguments arguments() { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java index 25f6a58734..eac6bdae58 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java @@ -230,7 +230,7 @@ CompletableFuture downloadIfNecessary(ResourceToDownload resourceToDownload) private void checkDiskSpace(Path destination, long size) { try { - var fs = Files.getFileStore(destination.getParent()); + var fs = Files.getFileStore(destination.toAbsolutePath().getParent()); var totalPendingBytes = bytesToDownload.merge(fs, size, Long::sum); var availableBytes = fs.getUnallocatedSpace(); if (totalPendingBytes > availableBytes) { diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java index ba4e4fc756..0b6906d8c9 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/util/FileUtils.java @@ -133,7 +133,10 @@ public static void createParentDirectories(Path path) { if (Files.isDirectory(path)) { Files.createDirectories(path); } else { - Files.createDirectories(path.getParent()); + Path parent = path.getParent(); + if (parent != null) { + Files.createDirectories(parent); + } } } catch (IOException e) { throw new IllegalStateException("Unable to create parent directories " + path, e); From 2fbbba8bfbcef95bf8235e875aac7fc720f3ab72 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Tue, 1 Mar 2022 09:19:01 -0500 Subject: [PATCH 4/5] add option to free disk space after reading a source --- .../com/onthegomap/planetiler/Planetiler.java | 67 +++++++++++++------ 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index 4e563b1faf..0d7e58e9b3 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -513,8 +513,13 @@ public void run() throws Exception { stage.task.run(); } - LOGGER.info("Deleting node.db to make room for mbtiles"); + LOGGER.info("Deleting node.db to make room for output file"); profile.release(); + for (var inputPath : inputPaths) { + if (inputPath.freeAfterReading()) { + LOGGER.info("Deleting " + inputPath.id + "(" + inputPath.path + ") to make room for output file"); + } + } featureGroup.prepare(); @@ -527,38 +532,56 @@ public void run() throws Exception { } private void checkDiskSpace() { - Map bytesRequested = new HashMap<>(); + Map readPhaseBytes = new HashMap<>(); + Map writePhaseBytes = new HashMap<>(); long osmSize = osmInputFile.diskUsageBytes(); long nodeMapSize = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), osmSize); long featureSize = profile.estimateIntermediateDiskBytes(osmSize); long outputSize = profile.estimateOutputBytes(osmSize); try { - bytesRequested.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum); - bytesRequested.merge(Files.getFileStore(tmpDir), featureSize, Long::sum); - bytesRequested.merge(Files.getFileStore(output.toAbsolutePath().getParent()), outputSize, Long::sum); - for (var entry : bytesRequested.entrySet()) { - var fs = entry.getKey(); - var requested = entry.getValue(); - long available = fs.getUnallocatedSpace(); - if (available < requested) { - var format = Format.defaultInstance(); - String warning = - "Planetiler needs ~" + format.storage(requested) + " on " + fs + " which only has " - + format.storage(available) + " available"; - if (config.force() || requested < available * 1.25) { - LOGGER.warn(warning + ", may fail."); - } else { - throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); - } + // node locations only needed while reading inputs + readPhaseBytes.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum); + // feature db persists across read/write phase + readPhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum); + writePhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum); + // output only needed during write phase + writePhaseBytes.merge(Files.getFileStore(output.toAbsolutePath().getParent()), outputSize, Long::sum); + // if the user opts to remove an input source after reading to free up additional space for the output... + for (var input : inputPaths) { + if (input.freeAfterReading()) { + writePhaseBytes.merge(Files.getFileStore(input.path), -Files.size(input.path), Long::sum); } } + + checkDiskSpaceOnDevices(readPhaseBytes, "read"); + checkDiskSpaceOnDevices(writePhaseBytes, "write"); } catch (IOException e) { LOGGER.warn("Unable to check disk space requirements, may run out of room " + e); } } + private void checkDiskSpaceOnDevices(Map readPhaseBytes, String phase) throws IOException { + for (var entry : readPhaseBytes.entrySet()) { + var fs = entry.getKey(); + var requested = entry.getValue(); + long available = fs.getUnallocatedSpace(); + if (available < requested) { + var format = Format.defaultInstance(); + String warning = + "Planetiler needs ~" + format.storage(requested) + " on " + fs + " during " + phase + + " phase, which only has " + + format.storage(available) + " available"; + if (config.force() || requested < available * 1.25) { + LOGGER.warn(warning + ", may fail."); + } else { + throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway."); + } + } + } + } + private void checkMemory() { var format = Format.defaultInstance(); long nodeMap = LongLongMap.estimateMemoryUsage(config.nodeMapType(), config.nodeMapStorage(), @@ -617,13 +640,15 @@ private RunnableThatThrows ifSourceUsed(String name, RunnableThatThrows task) { private Path getPath(String name, String type, Path defaultPath, String defaultUrl) { Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath); + boolean freeAfterReading = arguments.getBoolean(name + "_free_after_read", + name + " delete after reading to make space for output (reduces peak disk usage)", false); if (downloadSources) { String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl); if (!Files.exists(path) && url != null) { toDownload.add(new ToDownload(name, url, path)); } } - inputPaths.add(new InputPath(name, path)); + inputPaths.add(new InputPath(name, path, freeAfterReading)); return path; } @@ -656,5 +681,5 @@ private record Stage(String id, List details, RunnableThatThrows task) { private record ToDownload(String id, String url, Path path) {} - private record InputPath(String id, Path path) {} + private record InputPath(String id, Path path, boolean freeAfterReading) {} } From c1118f4beb4bc4a26a8933913ee3820a52956031 Mon Sep 17 00:00:00 2001 From: Mike Barry Date: Thu, 3 Mar 2022 07:16:19 -0500 Subject: [PATCH 5/5] change free param and fix only-wikidata/only-download logic --- config-example.properties | 4 ++++ .../java/com/onthegomap/planetiler/Planetiler.java | 11 ++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/config-example.properties b/config-example.properties index 3e681e9731..389c36070f 100644 --- a/config-example.properties +++ b/config-example.properties @@ -119,6 +119,10 @@ # osm_path=path/to/monaco.osm.pbf # osm_url=https://url/for/monaco.osm.pbf +# To delete an input file before writing the output file (and reduce peak disk requirements): +# free_osm_after_read: true +# free_natural_earth_after_read: true + #### Layer-specific overrides: #### "boundary" layer diff --git a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java index 0d7e58e9b3..dbb0c07f03 100644 --- a/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java +++ b/planetiler-core/src/main/java/com/onthegomap/planetiler/Planetiler.java @@ -485,17 +485,14 @@ public void run() throws Exception { Files.createDirectories(tmpDir); checkDiskSpace(); checkMemory(); - if (onlyDownloadSources) { - return; // exit only if just downloading - } if (fetchWikidata) { Wikidata.fetch(osmInputFile(), wikidataNamesFile, config(), profile(), stats()); } if (useWikidata) { translations().addTranslationProvider(Wikidata.load(wikidataNamesFile)); } - if (onlyFetchWikidata) { - return; // exit only if just fetching wikidata + if (onlyDownloadSources || onlyFetchWikidata) { + return; // exit only if just fetching wikidata or downloading sources } if (osmInputFile != null) { config.bounds().setFallbackProvider(osmInputFile); @@ -640,8 +637,8 @@ private RunnableThatThrows ifSourceUsed(String name, RunnableThatThrows task) { private Path getPath(String name, String type, Path defaultPath, String defaultUrl) { Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath); - boolean freeAfterReading = arguments.getBoolean(name + "_free_after_read", - name + " delete after reading to make space for output (reduces peak disk usage)", false); + boolean freeAfterReading = arguments.getBoolean("free_" + name + "_after_read", + "delete " + name + " input file after reading to make space for output (reduces peak disk usage)", false); if (downloadSources) { String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl); if (!Files.exists(path) && url != null) {