From b42da6daa0d715afb4259ac7c0a1d6a71adca89d Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Tue, 14 Jan 2025 17:20:18 -0300 Subject: [PATCH] fix: Update fs max user instances for k8s (#11220) We're still seeing `create fsnotify watcher: too many open files` on some kind tests. I booted up the `052a1e16394277fdf` AMI we're using for the tester in a micro instance on AWS and found the following defaults: ``` # sysctl fs.file-max = 9223372036854775807 fs.inotify.max_user_instances = 128 fs.inotify.max_user_watches = 8192 # ulimit open files (-n) 1024 ``` So we're now trying to bump not just user watches but also instances, as well as ulimit max open files. Full output from a fresh system here: ``` sudo sysctl -a | grep fs fs.aio-max-nr = 65536 fs.aio-nr = 0 fs.binfmt_misc.python3/10 = enabled fs.binfmt_misc.python3/10 = interpreter /usr/bin/python3.10 fs.binfmt_misc.python3/10 = flags: fs.binfmt_misc.python3/10 = offset 0 fs.binfmt_misc.python3/10 = magic 6f0d0d0a fs.binfmt_misc.status = enabled fs.dentry-state = 31608 12957 45 0 2892 0 fs.dir-notify-enable = 1 fs.epoll.max_user_watches = 212992 fs.fanotify.max_queued_events = 16384 fs.fanotify.max_user_groups = 128 fs.fanotify.max_user_marks = 8192 fs.file-max = 9223372036854775807 fs.file-nr = 1152 0 9223372036854775807 fs.inode-nr = 29688 705 fs.inode-state = 29688 705 0 0 0 0 0 fs.inotify.max_queued_events = 16384 fs.inotify.max_user_instances = 128 fs.inotify.max_user_watches = 8192 fs.lease-break-time = 45 fs.leases-enable = 1 fs.mount-max = 100000 fs.mqueue.msg_default = 10 fs.mqueue.msg_max = 10 fs.mqueue.msgsize_default = 8192 fs.mqueue.msgsize_max = 8192 fs.mqueue.queues_max = 256 fs.nr_open = 1048576 fs.overflowgid = 65534 fs.overflowuid = 65534 fs.pipe-max-size = 1048576 fs.pipe-user-pages-hard = 0 fs.pipe-user-pages-soft = 16384 fs.protected_fifos = 1 fs.protected_hardlinks = 1 fs.protected_regular = 2 fs.protected_symlinks = 1 fs.quota.allocated_dquots = 0 fs.quota.cache_hits = 0 fs.quota.drops = 0 fs.quota.free_dquots = 0 fs.quota.lookups = 0 fs.quota.reads = 0 fs.quota.syncs = 4 fs.quota.writes = 0 fs.suid_dumpable = 2 fs.verity.require_signatures = 0 ulimit -a real-time non-blocking time (microseconds, -R) unlimited core file size (blocks, -c) 0 data seg size (kbytes, -d) unlimited scheduling priority (-e) 0 file size (blocks, -f) unlimited pending signals (-i) 3737 max locked memory (kbytes, -l) 121500 max memory size (kbytes, -m) unlimited open files (-n) 1024 pipe size (512 bytes, -p) 8 POSIX message queues (bytes, -q) 819200 real-time priority (-r) 0 stack size (kbytes, -s) 8192 cpu time (seconds, -t) unlimited max user processes (-u) 3737 virtual memory (kbytes, -v) unlimited file locks (-x) unlimited ``` --- .github/ensure-tester/action.yml | 1 + .github/ensure-tester/run | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/ensure-tester/action.yml b/.github/ensure-tester/action.yml index d96e117e3c0..47dc913627e 100644 --- a/.github/ensure-tester/action.yml +++ b/.github/ensure-tester/action.yml @@ -18,6 +18,7 @@ inputs: default: BestEffort runs: # define an action, runs in OS of caller + # ami-052a1e16394277fdf is an ubuntu 22.04.4 jammy using: composite steps: - name: Select Instance Type and AMI diff --git a/.github/ensure-tester/run b/.github/ensure-tester/run index bb40831c515..947e799872d 100755 --- a/.github/ensure-tester/run +++ b/.github/ensure-tester/run @@ -5,7 +5,6 @@ ttl=$1 scripts/run_on_tester " set -eu; sudo shutdown -P $ttl; - sudo sysctl fs.inotify.max_user_watches=65536 function clone { if ! [ -d ~/run-$RUN_ID ]; then mkdir -p ~/run-$RUN_ID; @@ -18,6 +17,12 @@ scripts/run_on_tester " } export RUN_ID GIT_COMMIT export -f clone + + # update fs limits for k8s + sudo sysctl -w fs.inotify.max_user_watches=65536 + sudo sysctl -w fs.inotify.max_user_instances=65536 + ulimit -n 32768 + flock /var/lock/clone.lock bash -c clone cd ~/run-$RUN_ID # reuse script from ensure-builder, but don't set up chron