Skip to content

Commit

Permalink
cli: add top-level retry loop.
Browse files Browse the repository at this point in the history
We've been hitting some intermittent crashes of the following
form recently:

    Traceback (most recent call last):
      File "/usr/local/bin/packet-networking", line 11, in <module>
        sys.exit(cli())
      File "/usr/local/lib/python3.5/dist-packages/click/core.py", line 722, in __call__
        return self.main(*args, **kwargs)
      File "/usr/local/lib/python3.5/dist-packages/click/core.py", line 697, in main
        rv = self.invoke(ctx)
      File "/usr/local/lib/python3.5/dist-packages/click/core.py", line 895, in invoke
        return ctx.invoke(self.callback, **ctx.params)
      File "/usr/local/lib/python3.5/dist-packages/click/core.py", line 535, in invoke
        return callback(*args, **kwargs)
      File "/usr/local/lib/python3.5/dist-packages/packetnetworking/cli.py", line 107, in cli
        tasks = builder.run(rootfs)
      File "/usr/local/lib/python3.5/dist-packages/packetnetworking/builder.py", line 67, in run
        return builder.run(rootfs_path)
      File "/usr/local/lib/python3.5/dist-packages/packetnetworking/distros/distro_builder.py", line 163, in run
        rendered_tasks = self.render()
      File "/usr/local/lib/python3.5/dist-packages/packetnetworking/distros/distro_builder.py", line 156, in render
        rendered_tasks[path] = template.render(self.context())
      File "/usr/local/lib/python3.5/dist-packages/jinja2/environment.py", line 1008, in render
        return self.environment.handle_exception(exc_info, True)
      File "/usr/local/lib/python3.5/dist-packages/jinja2/environment.py", line 780, in handle_exception
        reraise(exc_type, exc_value, tb)
      File "/usr/local/lib/python3.5/dist-packages/jinja2/_compat.py", line 37, in reraise
        raise value.with_traceback(tb)
      File "<template>", line 13, in top-level template code
    jinja2.exceptions.UndefinedError: 'None' has no attribute 'address'

Current speculation is that this is due to some sort of hegel race;
until the root cause is determined & fixed we're hoping this will keep
things running.
  • Loading branch information
zevweiss committed May 20, 2020
1 parent 6356d40 commit 327997e
Showing 1 changed file with 32 additions and 0 deletions.
32 changes: 32 additions & 0 deletions packetnetworking/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import time
import json
import click
import logging
Expand Down Expand Up @@ -68,6 +69,37 @@ def cli(
)
)

# arbitrary retry count
max_attempts = 10
attempt = 1
while True:
try:
try_run(
metadata_file,
metadata_url,
operating_system,
rootfs,
resolvers,
verbose,
quiet,
)
break
except Exception as exc:
if attempt == max_attempts:
raise
attempt += 1
delay = 2 ** min(attempt, 7)
log.error(
"Caught unexpected exception ('{}'), retrying in {} seconds...".format(
exc, delay
)
)
time.sleep(delay)


def try_run(
metadata_file, metadata_url, operating_system, rootfs, resolvers, verbose, quiet
):
builder = packetnetworking.Builder()
if metadata_file:
builder.set_metadata(json.load(metadata_file))
Expand Down

0 comments on commit 327997e

Please sign in to comment.