Skip to content

Commit

Permalink
fix: multi nat initialization causing dead lock in waku tests + seria…
Browse files Browse the repository at this point in the history
…lize test runs to avoid timing and port occupied issues (#2799)

* Prevent multiple nat module initialization that cause dead lock in nat refresh thread tear down during tests.
* NPROC to 1 to avoid parallel test runs can lead to timing and port allocation issues

Co-authored-by: gabrielmer <101006718+gabrielmer@users.noreply.github.com>
  • Loading branch information
NagyZoltanPeter and gabrielmer authored Jun 12, 2024
1 parent 25a8b98 commit 5989de8
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 24 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ jobs:

- name: Run tests
run: |
if [ ${{ runner.os }} == "Linux" ]; then
sudo docker run --rm -d -e POSTGRES_PASSWORD=test123 -p 5432:5432 postgres:15.4-alpine3.18
fi
postgres_enabled=0
if [ ${{ runner.os }} == "Linux" ]; then
sudo docker run --rm -d -e POSTGRES_PASSWORD=test123 -p 5432:5432 postgres:15.4-alpine3.18
postgres_enabled=1
fi
export MAKEFLAGS="-j1"
export NIMFLAGS="--colors:off -d:chronicles_colors:none"
make RLN_V${{matrix.rln_version}}=true V=1 LOG_LEVEL=DEBUG QUICK_AND_DIRTY_COMPILER=1 POSTGRES=$postgres_enabled test testwakunode2
build-docker-image:
Expand Down
58 changes: 38 additions & 20 deletions waku/common/utils/nat.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ import chronicles, eth/net/nat, stew/results, nativesockets
logScope:
topics = "nat"

## Due to the design of nim-eth/nat module we must ensure it is only initialized once.
## see: https://github.com/waku-org/nwaku/issues/2628
## Details: nim-eth/nat module starts a meaintenance thread for refreshing the NAT mappings, but everything in the module is global,
## there is no room to store multiple configurations.
## Exact meaning: redirectPorts cannot be called twice in a program lifetime.
## During waku tests we happen to start several node instances in parallel thus resulting in multiple NAT configurations and multiple threads.
## Those threads will dead lock each other in tear down.
var singletonNat: bool = false

proc setupNat*(
natConf, clientId: string, tcpPort, udpPort: Port
): Result[
Expand All @@ -26,26 +35,35 @@ proc setupNat*(
tuple[ip: Option[IpAddress], tcpPort: Option[Port], udpPort: Option[Port]]

if strategy != NatNone:
let extIp = getExternalIP(strategy)
if extIP.isSome():
endpoint.ip = some(extIp.get())
# RedirectPorts in considered a gcsafety violation
# because it obtains the address of a non-gcsafe proc?
var extPorts: Option[(Port, Port)]
try:
extPorts = (
{.gcsafe.}:
redirectPorts(tcpPort = tcpPort, udpPort = udpPort, description = clientId)
)
except CatchableError:
# TODO: nat.nim Error: can raise an unlisted exception: Exception. Isolate here for now.
error "unable to determine external ports"
extPorts = none((Port, Port))

if extPorts.isSome():
let (extTcpPort, extUdpPort) = extPorts.get()
endpoint.tcpPort = some(extTcpPort)
endpoint.udpPort = some(extUdpPort)
## Only initialize the NAT module once
## redirectPorts cannot be called twice in a program lifetime.
## We can do it as same happens if getExternalIP fails and returns None
if singletonNat:
warn "NAT already initialized, skipping as cannot be done multiple times"
else:
singletonNat = true
let extIp = getExternalIP(strategy)
if extIP.isSome():
endpoint.ip = some(extIp.get())
# RedirectPorts in considered a gcsafety violation
# because it obtains the address of a non-gcsafe proc?
var extPorts: Option[(Port, Port)]
try:
extPorts = (
{.gcsafe.}:
redirectPorts(
tcpPort = tcpPort, udpPort = udpPort, description = clientId
)
)
except CatchableError:
# TODO: nat.nim Error: can raise an unlisted exception: Exception. Isolate here for now.
error "unable to determine external ports"
extPorts = none((Port, Port))

if extPorts.isSome():
let (extTcpPort, extUdpPort) = extPorts.get()
endpoint.tcpPort = some(extTcpPort)
endpoint.udpPort = some(extUdpPort)
else: # NatNone
if not natConf.startsWith("extip:"):
return err("not a valid NAT mechanism: " & $natConf)
Expand Down

0 comments on commit 5989de8

Please sign in to comment.