Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HashedWheelTimer startup crash on .NET 6+ #7174

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions src/core/Akka.Tests/Actor/TimerStartupCrashBugFixSpec.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// -----------------------------------------------------------------------
// <copyright file="TimerStartupCrashBugFixSpec.cs" company="Akka.NET Project">
// Copyright (C) 2009-2024 Lightbend Inc. <http://www.lightbend.com>
// Copyright (C) 2013-2024 .NET Foundation <https://github.com/akkadotnet/akka.net>
// </copyright>
// -----------------------------------------------------------------------

#nullable enable
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Enabled nullable for just this test - on my quest to ensure that we start doing that generally everywhere.

using System;
using System.Linq;
using System.Threading.Tasks;
using Akka.Actor;
using Akka.Event;
using Akka.Routing;
using Akka.TestKit;
using FluentAssertions;
using FsCheck;
using Xunit;
using Xunit.Abstractions;

namespace Akka.Tests.Actor;

public class TimerStartupCrashBugFixSpec : AkkaSpec
{
public TimerStartupCrashBugFixSpec(ITestOutputHelper output) : base(output: output, Akka.Configuration.Config.Empty)
{
Sys.Log.Info("Starting TimerStartupCrashBugFixSpec");
}

private class TimerActor : UntypedActor, IWithTimers
{
public sealed class Check
{
public static Check Instance { get; } = new Check();

private Check()
{
}
}

public sealed class Hit
{
public static Hit Instance { get; } = new Hit();

private Hit()
{
}
}

private readonly ILoggingAdapter _log = Context.GetLogger();
private int _counter = 0;
public ITimerScheduler? Timers { get; set; } = null;

protected override void PreStart()
{
Timers?.StartPeriodicTimer("key", Hit.Instance, TimeSpan.FromMilliseconds(1));
}

protected override void OnReceive(object message)
{
switch (message)
{
case Check _:
_log.Info("Check received");
Sender.Tell(_counter);
break;
case Hit _:
_log.Info("Hit received");
_counter++;
break;
}
}

protected override void PreRestart(Exception reason, object message)
{
_log.Error(reason, "Not restarting - shutting down");
Context.Stop(Self);
}
}

[Fact]
public async Task TimerActor_should_not_crash_on_startup()
{
var actors = Enumerable.Range(0, 10).Select(c => Sys.ActorOf(Props.Create(() => new TimerActor()))).ToList();
var watchTasks = actors.Select(actor => actor.WatchAsync()).ToList();

var i = 0;
while (i == 0)
{
// guarantee that the actor has started and processed a message from scheduler
i = await actors[0].Ask<int>(TimerActor.Check.Instance);
}


watchTasks.Any(c => c.IsCompleted).Should().BeFalse();
}
}
12 changes: 7 additions & 5 deletions src/core/Akka/Actor/Scheduler/HashedWheelTimerScheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -147,25 +147,27 @@ private static int NormalizeTicksPerWheel(int ticksPerWheel)

private void Start()
{
if (_workerState == WORKER_STATE_STARTED)
// only read the worker state once so it can't be a moving target for else-branch
var workerStateRead = _workerState;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the key fix - cache the value once so we're not doing a volatile read on each branch of the if..else - that's what's lead to the problems.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

if (workerStateRead == WORKER_STATE_STARTED)
{
// do nothing
}
else if (_workerState == WORKER_STATE_INIT)
else if (workerStateRead == WORKER_STATE_INIT)
{
if (Interlocked.CompareExchange(ref _workerState, WORKER_STATE_STARTED, WORKER_STATE_INIT) == WORKER_STATE_INIT)
{
_timer ??= new PeriodicTimer(_timerDuration);
Task.Run(() => RunAsync(_cts.Token)); // start the clock
Task.Run(() => RunAsync(_cts.Token).ConfigureAwait(false)); // start the clock
}
}
else if (_workerState == WORKER_STATE_SHUTDOWN)
else if (workerStateRead == WORKER_STATE_SHUTDOWN)
{
throw new SchedulerException("cannot enqueue after timer shutdown");
}
else
{
throw new InvalidOperationException($"Worker in invalid state: {_workerState}");
throw new InvalidOperationException($"Worker in invalid state: {workerStateRead}");
}

if(_startTime == 0)
Expand Down
Loading