Skip to content

Commit

Permalink
Memory-mapped file improvements (#103)
Browse files Browse the repository at this point in the history
* Use large memory-mapped file segments to avoid running out of space on smaller machines
* Add `--nodemap-madvise` argument to opt into madvise(random) for memory-mapped file access
  • Loading branch information
msbarry authored Mar 9, 2022
1 parent 1c27d83 commit 0a06479
Show file tree
Hide file tree
Showing 12 changed files with 293 additions and 49 deletions.
2 changes: 2 additions & 0 deletions NOTICE.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The `planetiler-core` module includes the following software:
- com.google.guava:guava (Apache license)
- org.openstreetmap.osmosis:osmosis-osm-binary (LGPL 3.0)
- com.carrotsearch:hppc (Apache license)
- com.github.jnr:jnr-ffi (Apache license)
- Adapted code:
- `DouglasPeuckerSimplifier` from [JTS](https://github.com/locationtech/jts) (EDL)
- `OsmMultipolygon` from [imposm3](https://github.com/omniscale/imposm3) (Apache license)
Expand All @@ -31,6 +32,7 @@ The `planetiler-core` module includes the following software:
- `Imposm3Parsers` from [imposm3](https://github.com/omniscale/imposm3) (Apache license)
- `PbfDecoder` from [osmosis](https://github.com/openstreetmap/osmosis) (Public Domain)
- `PbfFieldDecoder` from [osmosis](https://github.com/openstreetmap/osmosis) (Public Domain)
- `NativeUtil` from [uppend](https://github.com/upserve/uppend/) (MIT License)

Additionally, the `planetiler-basemap` module is based on [OpenMapTiles](https://github.com/openmaptiles/openmaptiles):

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ Planetiler is made possible by these awesome open source projects:
and [tag parsing utilities](planetiler-core/src/main/java/com/onthegomap/planetiler/util/Imposm3Parsers.java)
- [HPPC](http://labs.carrotsearch.com/) for high-performance primitive Java collections
- [Osmosis](https://wiki.openstreetmap.org/wiki/Osmosis) for Java utilities to parse OpenStreetMap data
- [JNR-FFI](https://github.com/jnr/jnr-ffi) for utilities to access low-level system utilities to improve memory-mapped
file performance.

See [NOTICE.md](NOTICE.md) for a full list and license details.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static void main(String[] args) throws InterruptedException {
Format format = Format.defaultInstance();
Path path = Path.of("./llmaptest");
FileUtils.delete(path);
LongLongMap map = LongLongMap.from(args[0], args[1], path);
LongLongMap map = LongLongMap.from(args[0], args[1], path, args.length < 5 || Boolean.parseBoolean(args[4]));
long entries = Long.parseLong(args[2]);
int readers = Integer.parseInt(args[3]);

Expand All @@ -36,6 +36,7 @@ class LocalCounter {
LocalCounter counter = new LocalCounter();
ProgressLoggers loggers = ProgressLoggers.create()
.addRatePercentCounter("entries", entries, () -> counter.count, true)
.addFileSize(map)
.newLine()
.addProcessStats();
AtomicReference<String> writeRate = new AtomicReference<>();
Expand Down
5 changes: 5 additions & 0 deletions planetiler-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
<artifactId>osmosis-osm-binary</artifactId>
<version>0.48.3</version>
</dependency>
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-ffi</artifactId>
<version>2.2.11</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ public Planetiler addOsmSource(String name, Path defaultPath, String defaultUrl)
),
ifSourceUsed(name, () -> {
try (
var nodeLocations = LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath);
var nodeLocations =
LongLongMap.from(config.nodeMapType(), config.nodeMapStorage(), nodeDbPath, config.nodeMapMadvise());
var osmReader = new OsmReader(name, thisInputFile, nodeLocations, profile(), stats)
) {
osmReader.pass1(config);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package com.onthegomap.planetiler.collection;

import com.onthegomap.planetiler.util.FileUtils;
import com.onthegomap.planetiler.util.MmapUtil;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
Expand All @@ -29,15 +27,21 @@ abstract class AppendStoreMmap implements AppendStore {
final long segmentMask;
final long segmentBytes;
private final Path path;
private final boolean madvise;
long outIdx = 0;
private volatile MappedByteBuffer[] segments;
private volatile FileChannel channel;

AppendStoreMmap(Path path) {
this(path, 1 << 20); // 1MB
static {
MmapUtil.init();
}

AppendStoreMmap(Path path, long segmentSizeBytes) {
AppendStoreMmap(Path path, boolean madvise) {
this(path, 1 << 30, madvise); // 1GB
}

AppendStoreMmap(Path path, long segmentSizeBytes, boolean madvise) {
this.madvise = madvise;
segmentBits = (int) (Math.log(segmentSizeBytes) / Math.log(2));
segmentMask = (1L << segmentBits) - 1;
segmentBytes = segmentSizeBytes;
Expand All @@ -58,6 +62,7 @@ MappedByteBuffer[] getSegments() {
synchronized (this) {
if ((result = segments) == null) {
try {
boolean madviseFailed = false;
// prepare the memory mapped file: stop writing, start reading
outputStream.close();
channel = FileChannel.open(path, StandardOpenOption.READ);
Expand All @@ -66,7 +71,19 @@ MappedByteBuffer[] getSegments() {
int i = 0;
for (long segmentStart = 0; segmentStart < outIdx; segmentStart += segmentBytes) {
long segmentEnd = Math.min(segmentBytes, outIdx - segmentStart);
result[i++] = channel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd);
MappedByteBuffer thisBuffer = channel.map(FileChannel.MapMode.READ_ONLY, segmentStart, segmentEnd);
if (madvise) {
try {
MmapUtil.madvise(thisBuffer, MmapUtil.Madvice.RANDOM);
} catch (IOException e) {
if (!madviseFailed) { // log once
LOGGER.info(
"madvise not available on this system - node location lookup may be slower when less free RAM is available outside the JVM");
madviseFailed = true;
}
}
}
result[i++] = thisBuffer;
}
segments = result;
} catch (IOException e) {
Expand All @@ -87,27 +104,8 @@ public void close() throws IOException {
}
if (segments != null) {
try {
// attempt to force-unmap the file, so we can delete it later
// https://stackoverflow.com/questions/2972986/how-to-unmap-a-file-from-memory-mapped-using-filechannel-in-java
Class<?> unsafeClass;
try {
unsafeClass = Class.forName("sun.misc.Unsafe");
} catch (Exception ex) {
unsafeClass = Class.forName("jdk.internal.misc.Unsafe");
}
Method clean = unsafeClass.getMethod("invokeCleaner", ByteBuffer.class);
clean.setAccessible(true);
Field theUnsafeField = unsafeClass.getDeclaredField("theUnsafe");
theUnsafeField.setAccessible(true);
Object theUnsafe = theUnsafeField.get(null);
for (int i = 0; i < segments.length; i++) {
var buffer = segments[i];
if (buffer != null) {
clean.invoke(theUnsafe, buffer);
segments[i] = null;
}
}
} catch (Exception e) {
MmapUtil.unmap(segments);
} catch (IOException e) {
LOGGER.info("Unable to unmap " + path + " " + e);
}
Arrays.fill(segments, null);
Expand All @@ -122,12 +120,12 @@ public long diskUsageBytes() {

static class Ints extends AppendStoreMmap implements AppendStore.Ints {

Ints(Path path) {
super(path);
Ints(Path path, boolean madvise) {
super(path, madvise);
}

Ints(Path path, long segmentSizeBytes) {
super(path, segmentSizeBytes);
Ints(Path path, long segmentSizeBytes, boolean madvise) {
super(path, segmentSizeBytes, madvise);
}

@Override
Expand Down Expand Up @@ -158,12 +156,12 @@ public long size() {

static class Longs extends AppendStoreMmap implements AppendStore.Longs {

Longs(Path path) {
super(path);
Longs(Path path, boolean madvise) {
super(path, madvise);
}

Longs(Path path, long segmentSizeBytes) {
super(path, segmentSizeBytes);
Longs(Path path, long segmentSizeBytes, boolean madvise) {
super(path, segmentSizeBytes, madvise);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,17 @@ public interface LongLongMap extends Closeable, MemoryEstimator.HasEstimate, Dis
* @param name which implementation to use: {@code "noop"}, {@code "sortedtable"} or {@code "sparsearray"}
* @param storage how to store data: {@code "ram"} or {@code "mmap"}
* @param path where to store data (if mmap)
* @param madvise whether to use linux madvise random to improve read performance
* @return A longlong map instance
* @throws IllegalArgumentException if {@code name} or {@code storage} is not valid
*/
static LongLongMap from(String name, String storage, Path path) {
static LongLongMap from(String name, String storage, Path path, boolean madvise) {
boolean ram = isRam(storage);

return switch (name) {
case "noop" -> noop();
case "sortedtable" -> ram ? newInMemorySortedTable() : newDiskBackedSortedTable(path);
case "sparsearray" -> ram ? newInMemorySparseArray() : newDiskBackedSparseArray(path);
case "sortedtable" -> ram ? newInMemorySortedTable() : newDiskBackedSortedTable(path, madvise);
case "sparsearray" -> ram ? newInMemorySparseArray() : newDiskBackedSparseArray(path, madvise);
default -> throw new IllegalArgumentException("Unexpected value: " + name);
};
}
Expand Down Expand Up @@ -125,11 +126,11 @@ static LongLongMap newInMemorySortedTable() {
}

/** Returns a memory-mapped longlong map that uses 12-bytes per node and binary search to find values. */
static LongLongMap newDiskBackedSortedTable(Path dir) {
static LongLongMap newDiskBackedSortedTable(Path dir, boolean madvise) {
FileUtils.createDirectory(dir);
return new SortedTable(
new AppendStore.SmallLongs(i -> new AppendStoreMmap.Ints(dir.resolve("keys-" + i))),
new AppendStoreMmap.Longs(dir.resolve("values"))
new AppendStore.SmallLongs(i -> new AppendStoreMmap.Ints(dir.resolve("keys-" + i), madvise)),
new AppendStoreMmap.Longs(dir.resolve("values"), madvise)
);
}

Expand All @@ -145,8 +146,8 @@ static LongLongMap newInMemorySparseArray() {
* Returns a memory-mapped longlong map that uses 8-bytes per node and O(1) lookup but wastes space storing lots of
* 0's when the key space is fragmented.
*/
static LongLongMap newDiskBackedSparseArray(Path path) {
return new SparseArray(new AppendStoreMmap.Longs(path));
static LongLongMap newDiskBackedSparseArray(Path path, boolean madvise) {
return new SparseArray(new AppendStoreMmap.Longs(path, madvise));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public record PlanetilerConfig(
int sortMaxWriters,
String nodeMapType,
String nodeMapStorage,
boolean nodeMapMadvise,
String httpUserAgent,
Duration httpTimeout,
int httpRetries,
Expand Down Expand Up @@ -75,6 +76,8 @@ public static PlanetilerConfig from(Arguments arguments) {
arguments
.getString("nodemap_type", "type of node location map: noop, sortedtable, or sparsearray", "sortedtable"),
arguments.getString("nodemap_storage", "storage for location map: mmap or ram", "mmap"),
arguments.getBoolean("nodemap_madvise", "use linux madvise(random) to improve memory-mapped read performance",
false),
arguments.getString("http_user_agent", "User-Agent header to set when downloading files over HTTP",
"Planetiler downloader (https://github.com/onthegomap/planetiler)"),
arguments.getDuration("http_timeout", "Timeout to use when downloading files over HTTP", "30s"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
MIT License
Copyright (c) 2017 Upserve, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
package com.onthegomap.planetiler.util;

import com.kenai.jffi.MemoryIO;
import java.io.IOException;
import java.nio.ByteBuffer;
import jnr.ffi.LibraryLoader;
import jnr.ffi.types.size_t;

/**
* Wrapper for native madvise function to be used via the public API
* {@link MmapUtil#madvise(ByteBuffer, MmapUtil.Madvice)}.
* <p>
* Ported from <a href=
* "https://github.com/upserve/uppend/blob/70967c6f24d7f1a3bbc18799f485d981da93f53b/src/main/java/com/upserve/uppend/blobs/NativeIO.java">upserve/uppend/NativeIO</a>.
*
* @see <a href="https://man7.org/linux/man-pages/man2/madvise.2.html">madvise(2) — Linux manual page</a>
*/
class Madvise {

private static final NativeC nativeC = LibraryLoader.create(NativeC.class).load("c");
static int pageSize;

static {
try {
pageSize = nativeC.getpagesize(); // 4096 on most Linux
} catch (UnsatisfiedLinkError e) {
pageSize = -1;
}
}

private static long alignedAddress(long address) {
return address & (-pageSize);
}

private static long alignedSize(long address, int capacity) {
long end = address + capacity;
end = (end + pageSize - 1) & (-pageSize);
return end - alignedAddress(address);
}

/**
* Give a hint to the system how a mapped memory segment will be used so the OS can optimize performance.
*
* @param buffer The mapped memory segment.
* @param value The advice to use.
* @throws IOException If an error occurs or madvise not available on this system
* @see <a href="https://man7.org/linux/man-pages/man2/madvise.2.html">madvise(2) — Linux manual page</a>
*/
static void madvise(ByteBuffer buffer, int value) throws IOException {
if (pageSize <= 0) {
throw new IOException("madvise failed, pagesize not available");
}
final long address = MemoryIO.getInstance().getDirectBufferAddress(buffer);
final int capacity = buffer.capacity();

long alignedAddress = alignedAddress(address);
long alignedSize = alignedSize(alignedAddress, capacity);
try {
int val = nativeC.madvise(alignedAddress, alignedSize, value);
if (val != 0) {
throw new IOException(String.format("System call madvise failed with code: %d", val));
}
} catch (UnsatisfiedLinkError error) {
throw new IOException("madvise failed", error);
}
}

/** JNR-FFI will automatically compile these to wrappers around native functions with the same signatures. */
public interface NativeC {

int madvise(@size_t long address, @size_t long size, int advice);

int getpagesize();
}
}
Loading

0 comments on commit 0a06479

Please sign in to comment.