Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SetErrorStatusOnException option to TracerProviderSdk #1858

Merged
merged 24 commits into from
Mar 6, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions OpenTelemetry.sln
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "OpenTelemetry.Shared", "src
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestApp.AspNetCore.5.0", "test\TestApp.AspNetCore.5.0\TestApp.AspNetCore.5.0.csproj", "{972396A8-E35B-499C-9BA1-765E9B8822E1}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "exception-handling", "docs\trace\exception-handling\exception-handling.csproj", "{08D29501-F0A3-468F-B18D-BD1821A72383}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -387,6 +389,10 @@ Global
{972396A8-E35B-499C-9BA1-765E9B8822E1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{972396A8-E35B-499C-9BA1-765E9B8822E1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{972396A8-E35B-499C-9BA1-765E9B8822E1}.Release|Any CPU.Build.0 = Release|Any CPU
{08D29501-F0A3-468F-B18D-BD1821A72383}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{08D29501-F0A3-468F-B18D-BD1821A72383}.Debug|Any CPU.Build.0 = Debug|Any CPU
{08D29501-F0A3-468F-B18D-BD1821A72383}.Release|Any CPU.ActiveCfg = Release|Any CPU
{08D29501-F0A3-468F-B18D-BD1821A72383}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -416,6 +422,7 @@ Global
{B3F03725-23A0-4582-9526-F6A7E38F35CC} = {3862190B-E2C5-418E-AFDC-DB281FB5C705}
{13C10C9A-07E8-43EB-91F5-C2B116FBE0FC} = {3862190B-E2C5-418E-AFDC-DB281FB5C705}
{972396A8-E35B-499C-9BA1-765E9B8822E1} = {77C7929A-2EED-4AA6-8705-B5C443C8AA0F}
{08D29501-F0A3-468F-B18D-BD1821A72383} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {55639B5C-0770-4A22-AB56-859604650521}
Expand Down
2 changes: 1 addition & 1 deletion docs/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<!-- https://dotnet.microsoft.com/download/dotnet-core -->
<TargetFrameworks>netcoreapp2.1;netcoreapp3.1</TargetFrameworks>
<TargetFrameworks>netcoreapp2.1;netcoreapp3.1;net5.0</TargetFrameworks>
<!-- https://dotnet.microsoft.com/download/dotnet-framework -->
<TargetFrameworks Condition="$(OS) == 'Windows_NT'">$(TargetFrameworks);net461;net462;net47;net471;net472;net48</TargetFrameworks>
</PropertyGroup>
Expand Down
76 changes: 76 additions & 0 deletions docs/trace/exception-handling/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// <copyright file="Program.cs" company="OpenTelemetry Authors">
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// </copyright>

using System;
using System.Diagnostics;
using OpenTelemetry;
using OpenTelemetry.Trace;

public class Program
{
private static readonly ActivitySource MyActivitySource = new ActivitySource(
"MyCompany.MyProduct.MyLibrary");

public static void Main()
{
AppDomain.CurrentDomain.UnhandledException += UnhandledExceptionHandler;

using var tracerProvider = Sdk.CreateTracerProviderBuilder(options =>
{
options.SetErrorStatusOnException = true;
})
.AddSource("MyCompany.MyProduct.MyLibrary")
.SetSampler(new AlwaysOnSampler())
.AddConsoleExporter()
.Build();

try
{
Func();
}
catch (Exception)
{
// swallow the exception
}

Func();
reyang marked this conversation as resolved.
Show resolved Hide resolved
}

private static void Func()
{
using (MyActivitySource.StartActivity("Foo"))
{
using (MyActivitySource.StartActivity("Bar"))
{
throw new Exception("Oops!");
}
}
}

private static void UnhandledExceptionHandler(object source, UnhandledExceptionEventArgs args)
reyang marked this conversation as resolved.
Show resolved Hide resolved
{
var ex = (Exception)args.ExceptionObject;

var activity = Activity.Current;

while (activity != null)
{
activity.RecordException(ex);
activity.Dispose();
reyang marked this conversation as resolved.
Show resolved Hide resolved
activity = activity.Parent;
}
}
}
67 changes: 67 additions & 0 deletions docs/trace/exception-handling/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Exception Handling
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️ this guide 😄


## User-handled Exception

The term `User-handled Exception` is used to describe exceptions that are
handled by the application.

While using `Activity` API, the common pattern would be:

```csharp
using (var activity = MyActivitySource.StartActivity("Foo"))
{
try
{
Func();
}
catch (SomeException ex)
{
activity?.SetStatus(Status.Error);
DoSomething();
}
catch (Exception)
{
activity?.SetStatus(Status.Error);
throw;
}
}
```

The above approach could become hard to manage if there are deeply nested
`Activity` objects, or there are activities created in a 3rd party library.

The following configuration will automatically detect exception and set the
activity status to `Error`:

```csharp
Sdk.CreateTracerProviderBuilder(options => {
options.SetErrorStatusOnException = true;
});
```

A complete example can be found [here](./Program.cs).

Note: this feature is platform dependent as it relies on
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the doc it looks like its support in all cases except in .NET Core 2.1.
Do you know if it works in windows only?
While we can generally redirect users to official docs, we can list any obvious things right here in this doc . for example, if this only works in Windows, we can mention it here (along with link to official doc)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It works on Mac (x64) and Linux (x86 VM) as I've tested locally (I also created issue #1859 to cover ARM64).

[System.Runtime.InteropServices.Marshal.GetExceptionPointers](https://docs.microsoft.com/dotnet/api/system.runtime.interopservices.marshal.getexceptionpointers).

## Unhandled Exception

The term `Unhandled Exception` is used to describe exceptions that are not
handled by the application. When an unhandled exception happened, the behavior
will depend on the presence of a debugger:

* If there is no debugger, the exception will normally crash the process or
terminate the thread.
* If a debugger is attached, the debugger will be notified that an unhandled
exception happened.
* In case a postmortem debugger is configured, the postmortem debugger will be
activited and normally it will collect a crash dump.

It might be useful to automatically capture the unhandled exceptions, travel
through the unfinished activities and export them for troubleshooting. One
possible way of doing this by using
[AppDomain.UnhandledException](https://docs.microsoft.com/dotnet/api/system.appdomain.unhandledexception)
can be found [here](./Program.cs).

**WARNING:** Use `AppDomain.UnhandledException` with caution. A throw in the
handler puts the process into an unrecoverable state.
8 changes: 8 additions & 0 deletions docs/trace/exception-handling/exception-handling.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<!---
<PackageReference Include="OpenTelemetry.Exporter.Console" Version="$(OpenTelemetryExporterConsolePkgVer)" />
-->
<ProjectReference Include="$(RepoRoot)\src\OpenTelemetry.Exporter.Console\OpenTelemetry.Exporter.Console.csproj" />
</ItemGroup>
</Project>
5 changes: 5 additions & 0 deletions src/OpenTelemetry/.publicApi/net452/PublicAPI.Unshipped.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
OpenTelemetry.Trace.ParentBasedSampler.ParentBasedSampler(OpenTelemetry.Trace.Sampler rootSampler, OpenTelemetry.Trace.Sampler remoteParentSampled = null, OpenTelemetry.Trace.Sampler remoteParentNotSampled = null, OpenTelemetry.Trace.Sampler localParentSampled = null, OpenTelemetry.Trace.Sampler localParentNotSampled = null) -> void
OpenTelemetry.Trace.TracerProviderOptions
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.get -> bool
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.set -> void
OpenTelemetry.Trace.TracerProviderOptions.TracerProviderOptions() -> void
static OpenTelemetry.Sdk.CreateTracerProviderBuilder(System.Action<OpenTelemetry.Trace.TracerProviderOptions> configure = null) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddLegacyActivity(this OpenTelemetry.Trace.TracerProviderBuilder tracerProviderBuilder, string operationName) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderExtensions.ForceFlush(this OpenTelemetry.Trace.TracerProvider provider, int timeoutMilliseconds = -1) -> bool
5 changes: 5 additions & 0 deletions src/OpenTelemetry/.publicApi/net46/PublicAPI.Unshipped.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
OpenTelemetry.Trace.ParentBasedSampler.ParentBasedSampler(OpenTelemetry.Trace.Sampler rootSampler, OpenTelemetry.Trace.Sampler remoteParentSampled = null, OpenTelemetry.Trace.Sampler remoteParentNotSampled = null, OpenTelemetry.Trace.Sampler localParentSampled = null, OpenTelemetry.Trace.Sampler localParentNotSampled = null) -> void
OpenTelemetry.Trace.TracerProviderOptions
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.get -> bool
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.set -> void
OpenTelemetry.Trace.TracerProviderOptions.TracerProviderOptions() -> void
static OpenTelemetry.Sdk.CreateTracerProviderBuilder(System.Action<OpenTelemetry.Trace.TracerProviderOptions> configure = null) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddLegacyActivity(this OpenTelemetry.Trace.TracerProviderBuilder tracerProviderBuilder, string operationName) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderExtensions.ForceFlush(this OpenTelemetry.Trace.TracerProvider provider, int timeoutMilliseconds = -1) -> bool
5 changes: 5 additions & 0 deletions src/OpenTelemetry/.publicApi/net461/PublicAPI.Unshipped.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
OpenTelemetry.Trace.ParentBasedSampler.ParentBasedSampler(OpenTelemetry.Trace.Sampler rootSampler, OpenTelemetry.Trace.Sampler remoteParentSampled = null, OpenTelemetry.Trace.Sampler remoteParentNotSampled = null, OpenTelemetry.Trace.Sampler localParentSampled = null, OpenTelemetry.Trace.Sampler localParentNotSampled = null) -> void
OpenTelemetry.Trace.TracerProviderOptions
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.get -> bool
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.set -> void
OpenTelemetry.Trace.TracerProviderOptions.TracerProviderOptions() -> void
static OpenTelemetry.Sdk.CreateTracerProviderBuilder(System.Action<OpenTelemetry.Trace.TracerProviderOptions> configure = null) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddLegacyActivity(this OpenTelemetry.Trace.TracerProviderBuilder tracerProviderBuilder, string operationName) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderExtensions.ForceFlush(this OpenTelemetry.Trace.TracerProvider provider, int timeoutMilliseconds = -1) -> bool
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
OpenTelemetry.Trace.ParentBasedSampler.ParentBasedSampler(OpenTelemetry.Trace.Sampler rootSampler, OpenTelemetry.Trace.Sampler remoteParentSampled = null, OpenTelemetry.Trace.Sampler remoteParentNotSampled = null, OpenTelemetry.Trace.Sampler localParentSampled = null, OpenTelemetry.Trace.Sampler localParentNotSampled = null) -> void
OpenTelemetry.Trace.TracerProviderOptions
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.get -> bool
OpenTelemetry.Trace.TracerProviderOptions.SetErrorStatusOnException.set -> void
OpenTelemetry.Trace.TracerProviderOptions.TracerProviderOptions() -> void
static OpenTelemetry.Sdk.CreateTracerProviderBuilder(System.Action<OpenTelemetry.Trace.TracerProviderOptions> configure = null) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderBuilderExtensions.AddLegacyActivity(this OpenTelemetry.Trace.TracerProviderBuilder tracerProviderBuilder, string operationName) -> OpenTelemetry.Trace.TracerProviderBuilder
static OpenTelemetry.Trace.TracerProviderExtensions.ForceFlush(this OpenTelemetry.Trace.TracerProvider provider, int timeoutMilliseconds = -1) -> bool
9 changes: 5 additions & 4 deletions src/OpenTelemetry/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ please check the latest changes

## Unreleased

* Added `ForceFlush` to `TracerProvider`. ([#1837](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1837))

* Added `TracerProviderOptions` and `SetErrorStatusOnException`.
([#1858](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1858))
* Added `ForceFlush` to `TracerProvider`.
([#1837](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1837))
* Added a TracerProvierBuilder extension method called
`AddLegacyActivityOperationName` which is used by instrumentation libraries
that use DiagnosticSource to get activities processed without
ActivitySourceAdapter.
[#1836](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1836)

([#1836](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1836))
* Added new constructor with optional parameters to allow customization of
`ParentBasedSampler` behavior. ([#1727](https://github.com/open-telemetry/opentelemetry-dotnet/pull/1727))

Expand Down
18 changes: 15 additions & 3 deletions src/OpenTelemetry/Sdk.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.
// </copyright>

using System;
using System.Diagnostics;
using OpenTelemetry.Context.Propagation;
using OpenTelemetry.Internal;
Expand Down Expand Up @@ -54,13 +55,24 @@ public static void SetDefaultTextMapPropagator(TextMapPropagator textMapPropagat
}

/// <summary>
/// Creates TracerProviderBuilder which should be used to build
/// TracerProvider.
/// Creates TracerProviderBuilder which should be used to build TracerProvider.
/// </summary>
/// <returns>TracerProviderBuilder instance, which should be used to build TracerProvider.</returns>
public static TracerProviderBuilder CreateTracerProviderBuilder()
{
return new TracerProviderBuilderSdk();
return CreateTracerProviderBuilder(null);
}

/// <summary>
/// Creates TracerProviderBuilder which should be used to build TracerProvider.
/// </summary>
/// <param name="configure">TracerProvider configuration options.</param>
/// <returns>TracerProviderBuilder instance, which should be used to build TracerProvider.</returns>
public static TracerProviderBuilder CreateTracerProviderBuilder(Action<TracerProviderOptions> configure = null)
{
var options = new TracerProviderOptions();
configure?.Invoke(options);
return new TracerProviderBuilderSdk(options);
}
}
}
80 changes: 80 additions & 0 deletions src/OpenTelemetry/Trace/ExceptionProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// <copyright file="ExceptionProcessor.cs" company="OpenTelemetry Authors">
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// </copyright>

using System;
using System.Diagnostics;
using System.Linq.Expressions;
using System.Reflection;
using System.Runtime.InteropServices;

namespace OpenTelemetry.Trace
{
internal class ExceptionProcessor : BaseProcessor<Activity>
{
private const string ExceptionPointersKey = "otel.exception_pointers";

private readonly Func<IntPtr> fnGetExceptionPointers;

public ExceptionProcessor()
{
try
{
var flags = BindingFlags.Static | BindingFlags.Public;
var method = typeof(Marshal).GetMethod("GetExceptionPointers", flags, null, new Type[] { }, null);
var lambda = Expression.Lambda<Func<IntPtr>>(Expression.Call(method));
this.fnGetExceptionPointers = lambda.Compile();
}
catch (Exception ex)
{
throw new NotSupportedException("System.Runtime.InteropServices.Marshal.GetExceptionPointers is not supported.", ex);
reyang marked this conversation as resolved.
Show resolved Hide resolved
}
}

/// <inheritdoc />
public override void OnStart(Activity activity)
{
var pointers = this.fnGetExceptionPointers();

if (pointers != IntPtr.Zero)
{
activity.SetTag(ExceptionPointersKey, pointers);
}
}

/// <inheritdoc />
public override void OnEnd(Activity activity)
reyang marked this conversation as resolved.
Show resolved Hide resolved
{
var pointers = this.fnGetExceptionPointers();

if (pointers == IntPtr.Zero)
{
return;
}

var snapshot = activity.GetTagValue(ExceptionPointersKey) as IntPtr?;
CodeBlanch marked this conversation as resolved.
Show resolved Hide resolved

if (snapshot != null)
{
activity.SetTag(ExceptionPointersKey, null);
}

if (snapshot != pointers)
reyang marked this conversation as resolved.
Show resolved Hide resolved
{
activity.SetStatus(Status.Error);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to get the type, message, and/or stack from the IntPtr returned from GetExceptionPointers?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it is possible by traveling through the SEH object. This is CPU/OS/platform dependent and not well documented.
https://bytepointer.com/resources/pietrek_crash_course_depths_of_win32_seh.htm.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks petty intense! Not sure if it is worth attempting.

Why I bring it up:

I was just thinking about the applications I have instrumented with OpenTelemetry .NET so far. There are really only a few spots I've had the need to create spans manually. Almost exclusively around queues/topics. In those cases, I'm doing what the example has, with one exception...

catch (Exception ex)
{
   activity.SetStatus(Status.Error.WithDescription(ex.Message));
   throw;
}

...instead of...

catch (Exception)
{
   activity.SetStatus(Status.Error);
   throw;
}

So, commenting from the perspective of a user of the library, since there's only a handful of spots where I have the try/catch, and the message/description is super useful, I'll probably continue doing the try/catch instead of using the processor/option. But if the processor could grab the message too, that would be something.

Copy link
Member Author

@reyang reyang Mar 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy to cover it in a follow up PR.
One heads up, the exceptions we captured here could be any SEH exception - e.g. C++ exception, OS exception.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using #1874 to explore how deep should we go.

}
}
}
}
9 changes: 8 additions & 1 deletion src/OpenTelemetry/Trace/TracerProviderBuilderSdk.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,21 @@ internal class TracerProviderBuilderSdk : TracerProviderBuilder
{
private readonly List<InstrumentationFactory> instrumentationFactories = new List<InstrumentationFactory>();

private readonly TracerProviderOptions options;
private readonly List<BaseProcessor<Activity>> processors = new List<BaseProcessor<Activity>>();
private readonly List<string> sources = new List<string>();
private readonly Dictionary<string, bool> legacyActivityOperationNames = new Dictionary<string, bool>(StringComparer.OrdinalIgnoreCase);
private ResourceBuilder resourceBuilder = ResourceBuilder.CreateDefault();
private Sampler sampler = new ParentBasedSampler(new AlwaysOnSampler());

internal TracerProviderBuilderSdk()
internal TracerProviderBuilderSdk(TracerProviderOptions options)
{
this.options = options ?? new TracerProviderOptions();

if (options.SetErrorStatusOnException)
{
this.AddProcessor(new ExceptionProcessor());
reyang marked this conversation as resolved.
Show resolved Hide resolved
}
}

/// <summary>
Expand Down
Loading