diff --git a/configure b/configure index 17e13a48d47e5a..e1a20381bf4fd0 100755 --- a/configure +++ b/configure @@ -392,6 +392,12 @@ parser.add_option('--with-etw', dest='with_etw', help='build with ETW (default is true on Windows)') +parser.add_option('--use-largepages', + action='store_true', + dest='node_use_large_pages', + help='build with Large Pages support (enabled only for Linux).' + + '(Needs Linux kernel >= 2.6.38 with Transparent Huge pages enabled)') + intl_optgroup.add_option('--with-intl', action='store', dest='with_intl', @@ -936,6 +942,9 @@ def configure_node(o): else: o['variables']['node_use_dtrace'] = 'false' + use_large_pages = (flavor == 'linux' and options.node_use_large_pages) + o['variables']['node_use_large_pages'] = b(use_large_pages) + if options.no_ifaddrs: o['defines'] += ['SUNOS_NO_IFADDRS'] diff --git a/ld.implicit.script b/ld.implicit.script new file mode 100644 index 00000000000000..ad7ce1b2e2e5cf --- /dev/null +++ b/ld.implicit.script @@ -0,0 +1,8 @@ + SECTIONS { + .lpstub : { *(.lpstub) } + } + PROVIDE (__nodetext = .); + PROVIDE (_nodetext = .); + PROVIDE (nodetext = .); + INSERT BEFORE .text; + diff --git a/node.gyp b/node.gyp index f1dc3997b76e47..9097db614b4f4c 100644 --- a/node.gyp +++ b/node.gyp @@ -594,6 +594,19 @@ 'src/tls_wrap.h' ], }], + [ 'node_use_large_pages=="true"', { + 'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ], + # The current implementation of Large Pages is under Linux. + # Other implementations are possible but not currently supported. + # + 'conditions': [ + [ 'OS=="linux"', { + 'sources': [ + 'src/node_large_page.cc' + ], + }], + ] + } ], ], }, { diff --git a/node.gypi b/node.gypi index fd7c70a12b15f4..8fe15eb7cd7b85 100644 --- a/node.gypi +++ b/node.gypi @@ -188,7 +188,18 @@ }, }, 'conditions': [ - ['OS!="aix" and node_shared=="false"', { + ['OS=="linux" and node_shared=="false" \ + and target_arch=="x64" \ + and node_use_large_pages=="true"', { + 'ldflags': [ + '-Wl,-T <(PRODUCT_DIR)/../../ld.implicit.script', + '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' + 'uv<(STATIC_LIB_SUFFIX)', + '-Wl,--no-whole-archive', + ] + }], + ['OS!="aix" and node_shared=="false" \ + and node_use_large_pages=="false"', { 'ldflags': [ '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', diff --git a/src/node.cc b/src/node.cc index 008e7ed599ed70..75e9ccc7924e6a 100644 --- a/src/node.cc +++ b/src/node.cc @@ -70,6 +70,7 @@ #ifdef NODE_ENABLE_VTUNE_PROFILING #include "../deps/v8/src/third_party/vtune/v8-vtune.h" #endif +#include "node_large_page.h" #include #include // _O_RDWR @@ -4377,6 +4378,15 @@ int Start(int argc, char** argv) { CHECK_GT(argc, 0); +#ifdef NODE_ENABLE_LARGE_CODE_PAGES + if (node::IsLargePagesEnabled()) { + if ((node::MapStaticCodeToLargePages()) != 0) { + fprintf(stderr, "Reverting to default page size\n"); + } + } +#endif + + // Hack around with the argv pointer. Used for process.title = "blah". argv = uv_setup_args(argc, argv); diff --git a/src/node_large_page.cc b/src/node_large_page.cc new file mode 100644 index 00000000000000..4a910dc392f4f1 --- /dev/null +++ b/src/node_large_page.cc @@ -0,0 +1,258 @@ +// Copyright (C) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +// +// SPDX-License-Identifier: MIT + +#include +#include // _O_RDWR +#include // PATH_MAX +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// The functions in this file map the text segment of node into 2M pages. +// The algorithm is simple +// 1. Find the text region of node binary in memory +// Examine the /proc/self/maps to determine the currently mapped text +// region and obtain the start and end +// Modify the start to point to the very beginning of node text segment +// (from variable nodetext setup in ld.script) +// Align the address of start and end to Large Page Boundaries +// +// 2. Move the text region to large pages +// Map a new area and copy the original code there +// Use mmap using the start address with MAP_FIXED so we get exactly the +// same virtual address +// Use madvise with MADV_HUGE_PAGE to use Anonymous 2M Pages +// If successful copy the code there and unmap the original region. + +extern char __executable_start; +extern char __etext; +extern char __nodetext; + +namespace node { + +struct text_region { + char* from; + char* to; + int total_hugepages; + bool found_text_region; +}; + +static void PrintSystemError(int error) { + fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error)); + return; +} + +inline int64_t hugepage_align_up(int64_t addr) { + const size_t hps = 2L * 1024 * 1024; + return (((addr) + (hps) - 1) & ~((hps) - 1)); +} +inline int64_t hugepage_align_down(int64_t addr) { + const size_t hps = 2L * 1024 * 1024; + return ((addr) & ~((hps) - 1)); +} +// The format of the maps file is the following +// address perms offset dev inode pathname +// 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon + +static struct text_region FindNodeTextRegion() { + std::ifstream ifs; + std::string map_line; + std::string permission; + char dash; + int64_t start, end; + const size_t hps = 2L * 1024 * 1024; + struct text_region nregion; + + nregion.found_text_region = false; + + ifs.open("/proc/self/maps"); + if (!ifs) { + fprintf(stderr, "Could not open /proc/self/maps\n"); + return nregion; + } + std::getline(ifs, map_line); + std::istringstream iss(map_line); + ifs.close(); + + iss >> std::hex >> start; + iss >> dash; + iss >> std::hex >> end; + iss >> permission; + + if (permission.compare("r-xp") == 0) { + start = reinterpret_cast(&__nodetext); + char* from = reinterpret_cast(hugepage_align_up(start)); + char* to = reinterpret_cast(hugepage_align_down(end)); + + if (from < to) { + size_t size = to - from; + nregion.found_text_region = true; + nregion.from = from; + nregion.to = to; + nregion.total_hugepages = size / hps; + } + } + + return nregion; +} + +static bool IsTransparentHugePagesEnabled() { + std::ifstream ifs; + + ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); + if (!ifs) { + fprintf(stderr, "Could not open file: " \ + "/sys/kernel/mm/transparent_hugepage/enabled\n"); + return false; + } + + std::string always, madvise, never; + if (ifs.is_open()) { + while (ifs >> always >> madvise >> never) {} + } + + int ret_status = false; + + if (always.compare("[always]") == 0) + ret_status = true; + else if (madvise.compare("[madvise]") == 0) + ret_status = true; + else if (never.compare("[never]") == 0) + ret_status = false; + + ifs.close(); + return ret_status; +} + +// Moving the text region to large pages. We need to be very careful. +// a) This function itself should not be moved. +// We use a gcc option to put it outside the ".text" section +// b) This function should not call any function(s) that might be moved. +// 1. map a new area and copy the original code there +// 2. mmap using the start address with MAP_FIXED so we get exactly +// the same virtual address +// 3. madvise with MADV_HUGE_PAGE +// 3. If successful copy the code there and unmap the original region +int +__attribute__((__section__(".lpstub"))) +__attribute__((__aligned__(2 * 1024 * 1024))) +__attribute__((__noinline__)) +__attribute__((__optimize__("2"))) +MoveTextRegionToLargePages(struct text_region r) { + void* nmem = nullptr; + void* tmem = nullptr; + int ret = 0; + + size_t size = r.to - r.from; + void* start = r.from; + + // Allocate temporary region preparing for copy + nmem = mmap(nullptr, size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (nmem == MAP_FAILED) { + PrintSystemError(errno); + return -1; + } + + memcpy(nmem, r.from, size); + +// We already know the original page is r-xp +// (PROT_READ, PROT_EXEC, MAP_PRIVATE) +// We want PROT_WRITE because we are writing into it. +// We want it at the fixed address and we use MAP_FIXED. + tmem = mmap(start, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); + if (tmem == MAP_FAILED) { + PrintSystemError(errno); + munmap(nmem, size); + return -1; + } + + ret = madvise(tmem, size, MADV_HUGEPAGE); + if (ret == -1) { + PrintSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + + return -1; + } + + memcpy(start, nmem, size); + ret = mprotect(start, size, PROT_READ | PROT_EXEC); + if (ret == -1) { + PrintSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + return -1; + } + + // Release the old/temporary mapped region + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + + return ret; +} + +// This is the primary API called from main +int MapStaticCodeToLargePages() { + struct text_region r = FindNodeTextRegion(); + if (r.found_text_region == false) { + fprintf(stderr, "Hugepages WARNING: failed to find text region \n"); + return -1; + } + + if (r.from > reinterpret_cast (&MoveTextRegionToLargePages)) + return MoveTextRegionToLargePages(r); + + return -1; +} + +bool IsLargePagesEnabled() { + return IsTransparentHugePagesEnabled(); +} + +} // namespace node diff --git a/src/node_large_page.h b/src/node_large_page.h new file mode 100644 index 00000000000000..e607bbaa50a085 --- /dev/null +++ b/src/node_large_page.h @@ -0,0 +1,36 @@ +// Copyright (C) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +// +// SPDX-License-Identifier: MIT + +#ifndef SRC_NODE_LARGE_PAGE_H_ +#define SRC_NODE_LARGE_PAGE_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + + +namespace node { + bool IsLargePagesEnabled(); + int MapStaticCodeToLargePages(); +} // namespace node + +#endif // NODE_WANT_INTERNALS +#endif // SRC_NODE_LARGE_PAGE_H_ +