Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn when running with insufficient memory or disk space #73

Merged
merged 7 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config-example.properties
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@
# osm_path=path/to/monaco.osm.pbf
# osm_url=https://url/for/monaco.osm.pbf

# To delete an input file before writing the output file (and reduce peak disk requirements):
# free_osm_after_read: true
# free_natural_earth_after_read: true

#### Layer-specific overrides:

#### "boundary" layer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,24 @@ public String version() {
return OpenMapTilesSchema.VERSION;
}

@Override
public long estimateIntermediateDiskBytes(long osmFileSize) {
// in late 2021, a 60gb OSM file used 200GB for intermediate storage
return osmFileSize * 200 / 60;
}

@Override
public long estimateOutputBytes(long osmFileSize) {
// in late 2021, a 60gb OSM file generated a 100GB output file
return osmFileSize * 100 / 60;
}

@Override
public long estimateRamRequired(long osmFileSize) {
// 30gb for a 60gb OSM file is generally safe, although less might be OK too
return osmFileSize / 2;
}

/**
* Layers should implement this interface to subscribe to elements from <a href="https://www.naturalearthdata.com/">natural
* earth</a>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,26 @@
import com.onthegomap.planetiler.reader.ShapefileReader;
import com.onthegomap.planetiler.reader.osm.OsmInputFile;
import com.onthegomap.planetiler.reader.osm.OsmReader;
import com.onthegomap.planetiler.stats.ProcessInfo;
import com.onthegomap.planetiler.stats.Stats;
import com.onthegomap.planetiler.stats.Timers;
import com.onthegomap.planetiler.util.Downloader;
import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.Format;
import com.onthegomap.planetiler.util.Geofabrik;
import com.onthegomap.planetiler.util.LogUtil;
import com.onthegomap.planetiler.util.Translations;
import com.onthegomap.planetiler.util.Wikidata;
import com.onthegomap.planetiler.worker.RunnableThatThrows;
import java.io.IOException;
import java.nio.file.FileStore;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -444,7 +451,7 @@ public void run() throws Exception {
System.exit(0);
} else if (onlyDownloadSources) {
// don't check files if not generating map
} else if (overwrite || config.forceOverwrite()) {
} else if (overwrite || config.force()) {
FileUtils.deleteFile(output);
} else if (Files.exists(output)) {
throw new IllegalArgumentException(output + " already exists, use the --force argument to overwrite.");
Expand Down Expand Up @@ -475,17 +482,17 @@ public void run() throws Exception {
download();
}
ensureInputFilesExist();
if (onlyDownloadSources) {
return; // exit only if just downloading
}
Files.createDirectories(tmpDir);
checkDiskSpace();
checkMemory();
if (fetchWikidata) {
Wikidata.fetch(osmInputFile(), wikidataNamesFile, config(), profile(), stats());
}
if (useWikidata) {
translations().addTranslationProvider(Wikidata.load(wikidataNamesFile));
}
if (onlyFetchWikidata) {
return; // exit only if just fetching wikidata
if (onlyDownloadSources || onlyFetchWikidata) {
return; // exit only if just fetching wikidata or downloading sources
}
if (osmInputFile != null) {
config.bounds().setFallbackProvider(osmInputFile);
Expand All @@ -503,8 +510,13 @@ public void run() throws Exception {
stage.task.run();
}

LOGGER.info("Deleting node.db to make room for mbtiles");
LOGGER.info("Deleting node.db to make room for output file");
profile.release();
for (var inputPath : inputPaths) {
if (inputPath.freeAfterReading()) {
LOGGER.info("Deleting " + inputPath.id + "(" + inputPath.path + ") to make room for output file");
}
}

featureGroup.prepare();

Expand All @@ -516,6 +528,78 @@ public void run() throws Exception {
stats.close();
}

private void checkDiskSpace() {
Map<FileStore, Long> readPhaseBytes = new HashMap<>();
Map<FileStore, Long> writePhaseBytes = new HashMap<>();
long osmSize = osmInputFile.diskUsageBytes();
long nodeMapSize = LongLongMap.estimateDiskUsage(config.nodeMapType(), config.nodeMapStorage(), osmSize);
long featureSize = profile.estimateIntermediateDiskBytes(osmSize);
long outputSize = profile.estimateOutputBytes(osmSize);

try {
// node locations only needed while reading inputs
readPhaseBytes.merge(Files.getFileStore(tmpDir), nodeMapSize, Long::sum);
// feature db persists across read/write phase
readPhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum);
writePhaseBytes.merge(Files.getFileStore(tmpDir), featureSize, Long::sum);
// output only needed during write phase
writePhaseBytes.merge(Files.getFileStore(output.toAbsolutePath().getParent()), outputSize, Long::sum);
// if the user opts to remove an input source after reading to free up additional space for the output...
for (var input : inputPaths) {
if (input.freeAfterReading()) {
writePhaseBytes.merge(Files.getFileStore(input.path), -Files.size(input.path), Long::sum);
}
}

checkDiskSpaceOnDevices(readPhaseBytes, "read");
checkDiskSpaceOnDevices(writePhaseBytes, "write");
} catch (IOException e) {
LOGGER.warn("Unable to check disk space requirements, may run out of room " + e);
}

}

private void checkDiskSpaceOnDevices(Map<FileStore, Long> readPhaseBytes, String phase) throws IOException {
for (var entry : readPhaseBytes.entrySet()) {
var fs = entry.getKey();
var requested = entry.getValue();
long available = fs.getUnallocatedSpace();
if (available < requested) {
var format = Format.defaultInstance();
String warning =
"Planetiler needs ~" + format.storage(requested) + " on " + fs + " during " + phase
+ " phase, which only has "
+ format.storage(available) + " available";
if (config.force() || requested < available * 1.25) {
LOGGER.warn(warning + ", may fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
}
}

private void checkMemory() {
var format = Format.defaultInstance();
long nodeMap = LongLongMap.estimateMemoryUsage(config.nodeMapType(), config.nodeMapStorage(),
osmInputFile.diskUsageBytes());
long profile = profile().estimateRamRequired(osmInputFile.diskUsageBytes());
long requested = nodeMap + profile;
long jvmMemory = ProcessInfo.getMaxMemoryBytes();

if (jvmMemory < requested) {
String warning =
"Planetiler needs ~" + format.storage(requested) + " memory for the JVM, but only "
+ format.storage(jvmMemory) + " is available, try setting -Xmx=" + format.storage(requested).toLowerCase(
Locale.ROOT);
if (config.force() || requested < jvmMemory * 1.25) {
LOGGER.warn(warning + ", may fail.");
} else {
throw new IllegalArgumentException(warning + ", use the --force argument to continue anyway.");
}
}
}

public Arguments arguments() {
return arguments;
}
Expand Down Expand Up @@ -553,13 +637,15 @@ private RunnableThatThrows ifSourceUsed(String name, RunnableThatThrows task) {

private Path getPath(String name, String type, Path defaultPath, String defaultUrl) {
Path path = arguments.file(name + "_path", name + " " + type + " path", defaultPath);
boolean freeAfterReading = arguments.getBoolean("free_" + name + "_after_read",
"delete " + name + " input file after reading to make space for output (reduces peak disk usage)", false);
if (downloadSources) {
String url = arguments.getString(name + "_url", name + " " + type + " url", defaultUrl);
if (!Files.exists(path) && url != null) {
toDownload.add(new ToDownload(name, url, path));
}
}
inputPaths.add(new InputPath(name, path));
inputPaths.add(new InputPath(name, path, freeAfterReading));
return path;
}

Expand Down Expand Up @@ -592,5 +678,5 @@ private record Stage(String id, List<String> details, RunnableThatThrows task) {

private record ToDownload(String id, String url, Path path) {}

private record InputPath(String id, Path path) {}
private record InputPath(String id, Path path, boolean freeAfterReading) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,30 @@ default boolean caresAboutSource(String name) {
return true;
}

/**
* Returns an estimate for how many bytes of disk this profile will use for intermediate feature storage to warn when
* running with insufficient disk space.
*/
default long estimateIntermediateDiskBytes(long osmFileSize) {
return 0L;
}

/**
* Returns an estimate for how many bytes the output file will be to warn when running with insufficient disk space.
*/
default long estimateOutputBytes(long osmFileSize) {
return 0L;
}

/**
* Returns an estimate for how many bytes of RAM this will use to warn when running with insufficient memory.
* <p>
* This should include memory for things the profile stores in memory, as well as relations and multipolygons.
*/
default long estimateRamRequired(long osmFileSize) {
return 0L;
}

/**
* A default implementation of {@link Profile} that emits no output elements.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
* @throws IllegalArgumentException if {@code name} or {@code storage} is not valid
*/
static LongLongMap from(String name, String storage, Path path) {
boolean ram = switch (storage) {
case "ram" -> true;
case "mmap" -> false;
default -> throw new IllegalArgumentException("Unexpected storage value: " + storage);
};
boolean ram = isRam(storage);

return switch (name) {
case "noop" -> noop();
Expand All @@ -58,6 +54,47 @@ static LongLongMap from(String name, String storage, Path path) {
};
}

/** Estimates the number of bytes of RAM this nodemap will use for a given OSM input file. */
static long estimateMemoryUsage(String name, String storage, long osmFileSize) {
boolean ram = isRam(storage);
long nodes = estimateNumNodes(osmFileSize);

return switch (name) {
case "noop" -> 0;
case "sortedtable" -> 300_000_000L + (ram ? 12 * nodes : 0L);
case "sparsearray" -> 300_000_000L + (ram ? 9 * nodes : 0L);
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}

/** Estimates the number of bytes of disk this nodemap will use for a given OSM input file. */
static long estimateDiskUsage(String name, String storage, long osmFileSize) {
if (isRam(storage)) {
return 0;
} else {
long nodes = estimateNumNodes(osmFileSize);
return switch (name) {
case "noop" -> 0;
case "sortedtable" -> 12 * nodes;
case "sparsearray" -> 9 * nodes;
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}
}

private static boolean isRam(String storage) {
return switch (storage) {
case "ram" -> true;
case "mmap" -> false;
default -> throw new IllegalArgumentException("Unexpected storage value: " + storage);
};
}

private static long estimateNumNodes(long osmFileSize) {
// In February 2022, planet.pbf was 62GB with 750m nodes, so scale from there
return Math.round(750_000_000d * (osmFileSize / 62_000_000_000d));
}

/** Returns a longlong map that stores no data and throws on read */
static LongLongMap noop() {
return new LongLongMap() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public record PlanetilerConfig(
boolean deferIndexCreation,
boolean optimizeDb,
boolean emitTilesInOrder,
boolean forceOverwrite,
boolean force,
boolean gzipTempStorage,
int sortMaxReaders,
int sortMaxWriters,
Expand Down Expand Up @@ -62,7 +62,7 @@ public static PlanetilerConfig from(Arguments arguments) {
arguments.getBoolean("defer_mbtiles_index_creation", "skip adding index to mbtiles file", false),
arguments.getBoolean("optimize_db", "optimize mbtiles after writing", false),
arguments.getBoolean("emit_tiles_in_order", "emit tiles in index order", true),
arguments.getBoolean("force", "force overwriting output file", false),
arguments.getBoolean("force", "overwriting output file and ignore disk/RAM warnings", false),
arguments.getBoolean("gzip_temp", "gzip temporary feature storage (uses more CPU, but less disk space)", false),
arguments.getInteger("sort_max_readers", "maximum number of concurrent read threads to use when sorting chunks",
6),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.onthegomap.planetiler.reader.osm;

import com.onthegomap.planetiler.config.Bounds;
import com.onthegomap.planetiler.util.DiskBacked;
import com.onthegomap.planetiler.util.FileUtils;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
Expand All @@ -20,7 +22,7 @@
*
* @see <a href="https://wiki.openstreetmap.org/wiki/PBF_Format">OSM PBF Format</a>
*/
public class OsmInputFile implements Bounds.Provider, Supplier<OsmBlockSource> {
public class OsmInputFile implements Bounds.Provider, Supplier<OsmBlockSource>, DiskBacked {

private static final Logger LOGGER = LoggerFactory.getLogger(OsmInputFile.class);

Expand Down Expand Up @@ -123,6 +125,11 @@ public OsmBlockSource get() {
return lazy ? new LazyReader() : new EagerReader();
}

@Override
public long diskUsageBytes() {
return FileUtils.size(path);
}

private FileChannel openChannel() {
try {
return FileChannel.open(path, StandardOpenOption.READ);
Expand Down
Loading