From ba609f50751b1ce2d1e5b618d9e43d5d132e844e Mon Sep 17 00:00:00 2001
From: Prashanth Govindarajan <prgovi@microsoft.com>
Date: Thu, 21 May 2020 12:47:45 -0700
Subject: [PATCH 01/27] Update M.D.A to 0.4.0 (#528)

---
 src/csharp/Microsoft.Spark/Microsoft.Spark.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
index 643e1130c..f3d3f1ffd 100644
--- a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
+++ b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
@@ -27,7 +27,7 @@
   <ItemGroup>
     <PackageReference Include="Apache.Arrow" Version="0.14.1" />
     <PackageReference Include="Microsoft.CSharp" Version="4.5.0" />
-    <PackageReference Include="Microsoft.Data.Analysis" Version="0.3.0" />
+    <PackageReference Include="Microsoft.Data.Analysis" Version="0.4.0" />
     <PackageReference Include="Newtonsoft.Json" Version="11.0.2" />
     <PackageReference Include="Razorvine.Pyrolite" Version="4.26.0" />
     <PackageReference Include="System.Memory" Version="4.5.2" />

From 08d203a3179919be314a0df1bdd4fad5d1f0b37d Mon Sep 17 00:00:00 2001
From: Niharika Dutta <nidutta@microsoft.com>
Date: Tue, 26 May 2020 19:45:47 -0700
Subject: [PATCH 02/27] [DOC] Adding guides to explain UDF serialization and
 Broadcast variable usage (#464)

---
 docs/broadcast-guide.md |  92 +++++++++++++++++++++
 docs/udf-guide.md       | 171 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 263 insertions(+)
 create mode 100644 docs/broadcast-guide.md
 create mode 100644 docs/udf-guide.md

diff --git a/docs/broadcast-guide.md b/docs/broadcast-guide.md
new file mode 100644
index 000000000..c3026516b
--- /dev/null
+++ b/docs/broadcast-guide.md
@@ -0,0 +1,92 @@
+# Guide to using Broadcast Variables
+
+This is a guide to show how to use broadcast variables in .NET for Apache Spark.
+
+## What are Broadcast Variables
+
+[Broadcast variables in Apache Spark](https://spark.apache.org/docs/2.2.0/rdd-programming-guide.html#broadcast-variables) are a mechanism for sharing variables across executors that are meant to be read-only. They allow the programmer to keep a read-only variable cached on each machine rather than shipping a copy of it with tasks. They can be used, for example, to give every node a copy of a large input dataset in an efficient manner.
+
+### How to use broadcast variables in .NET for Apache Spark
+
+Broadcast variables are created from a variable `v` by calling `SparkContext.Broadcast(v)`. The broadcast variable is a wrapper around `v`, and its value can be accessed by calling the `Value()` method. 
+
+Example:
+
+```csharp
+string v = "Variable to be broadcasted";
+Broadcast<string> bv = SparkContext.Broadcast(v);
+
+// Using the broadcast variable in a UDF:
+Func<Column, Column> udf = Udf<string, string>(
+    str => $"{str}: {bv.Value()}");
+```
+
+The type parameter for `Broadcast` should be the type of the variable being broadcasted.
+
+### Deleting broadcast variables
+
+The broadcast variable can be deleted from all executors by calling the `Destroy()` method on it.
+
+```csharp
+// Destroying the broadcast variable bv:
+bv.Destroy();
+```
+
+> Note: `Destroy()` deletes all data and metadata related to the broadcast variable. Use this with caution - once a broadcast variable has been destroyed, it cannot be used again.
+
+#### Caveat of using Destroy
+
+One important thing to keep in mind while using broadcast variables in UDFs is to limit the scope of the variable to only the UDF that is referencing it. The [guide to using UDFs](udf-guide.md) describes this phenomenon in detail. This is especially crucial when calling `Destroy` on the broadcast variable. If the broadcast variable that has been destroyed is visible to or accessible from other UDFs, it gets picked up for serialization by all those UDFs, even if it is not being referenced by them. This will throw an error as .NET for Apache Spark is not able to serialize the destroyed broadcast variable.
+
+Example to demonstrate:
+
+```csharp
+string v = "Variable to be broadcasted";
+Broadcast<string> bv = SparkContext.Broadcast(v);
+
+// Using the broadcast variable in a UDF:
+Func<Column, Column> udf1 = Udf<string, string>(
+    str => $"{str}: {bv.Value()}");
+
+// Destroying bv
+bv.Destroy();
+
+// Calling udf1 after destroying bv throws the following expected exception:
+// org.apache.spark.SparkException: Attempted to use Broadcast(0) after it was destroyed
+df.Select(udf1(df["_1"])).Show();
+
+// Different UDF udf2 that is not referencing bv
+Func<Column, Column> udf2 = Udf<string, string>(
+    str => $"{str}: not referencing broadcast variable");
+
+// Calling udf2 throws the following (unexpected) exception:
+// [Error] [JvmBridge] org.apache.spark.SparkException: Task not serializable
+df.Select(udf2(df["_1"])).Show();
+```
+
+The recommended way of implementing above desired behavior:
+
+```csharp
+string v = "Variable to be broadcasted";
+// Restricting the visibility of bv to only the UDF referencing it
+{
+    Broadcast<string> bv = SparkContext.Broadcast(v);
+
+    // Using the broadcast variable in a UDF:
+    Func<Column, Column> udf1 = Udf<string, string>(
+        str => $"{str}: {bv.Value()}");
+
+    // Destroying bv
+    bv.Destroy();
+}
+
+// Different UDF udf2 that is not referencing bv
+Func<Column, Column> udf2 = Udf<string, string>(
+    str => $"{str}: not referencing broadcast variable");
+
+// Calling udf2 works fine as expected
+df.Select(udf2(df["_1"])).Show();
+```
+ This ensures that destroying `bv` doesn't affect calling `udf2` because of unexpected serialization behavior. 
+
+ Broadcast variables are useful for transmitting read-only data to all executors, as the data is sent only once and this can give performance benefits when compared with using local variables that get shipped to the executors with each task. Please refer to the [official documentation](https://spark.apache.org/docs/2.2.0/rdd-programming-guide.html#broadcast-variables) to get a deeper understanding of broadcast variables and why they are used.
\ No newline at end of file
diff --git a/docs/udf-guide.md b/docs/udf-guide.md
new file mode 100644
index 000000000..6a2905bf4
--- /dev/null
+++ b/docs/udf-guide.md
@@ -0,0 +1,171 @@
+# Guide to User-Defined Functions (UDFs)
+
+This is a guide to show how to use UDFs in .NET for Apache Spark.
+
+## What are UDFs
+
+[User-Defined Functions (UDFs)](https://spark.apache.org/docs/latest/api/java/org/apache/spark/sql/expressions/UserDefinedFunction.html) are a feature of Spark that allow developers to use custom functions to extend the system's built-in functionality. They transform values from a single row within a table to produce a single corresponding output value per row based on the logic defined in the UDF.
+
+Let's take the following as an example for a UDF definition:
+
+```csharp
+string s1 = "hello";
+Func<Column, Column> udf = Udf<string, string>(
+    str => $"{s1} {str}");
+
+```
+The above defined UDF takes a `string` as an input (in the form of a [Column](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/Column.cs#L14) of a [Dataframe](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/DataFrame.cs#L24)), and returns a `string` with `hello` appended in front of the input.
+
+For a sample Dataframe, let's take the following Dataframe `df`:
+
+```text
++-------+
+|   name|
++-------+
+|Michael|
+|   Andy|
+| Justin|
++-------+
+```
+
+Now let's apply the above defined `udf` to the dataframe `df`:
+
+```csharp
+DataFrame udfResult = df.Select(udf(df["name"]));
+```
+
+This would return the below as the Dataframe `udfResult`:
+
+```text
++-------------+
+|         name|
++-------------+
+|hello Michael|
+|   hello Andy|
+| hello Justin|
++-------------+
+```
+To get a better understanding of how to implement UDFs, please take a look at the [UDF helper functions](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/Functions.cs#L3616) and some [test examples](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs#L49).
+
+## UDF serialization
+
+Since UDFs are functions that need to be executed on the workers, they have to be serialized and sent to the workers as part of the payload from the driver. This involves serializing the [delegate](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/delegates/) which is a reference to the method, along with its [target](https://docs.microsoft.com/en-us/dotnet/api/system.delegate.target?view=netframework-4.8) which is the class instance on which the current delegate invokes the instance method. Please take a look at this [code](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Utils/CommandSerDe.cs#L149) to get a better understanding of how UDF serialization is being done.
+
+## Good to know while implementing UDFs
+
+One behavior to be aware of while implementing UDFs in .NET for Apache Spark is how the target of the UDF gets serialized. .NET for Apache Spark uses .NET Core, which does not support serializing delegates, so it is instead done by using reflection to serialize the target where the delegate is defined. When multiple delegates are defined in a common scope, they have a shared closure that becomes the target of reflection for serialization. Let's take an example to illustrate what that means.
+
+The following code snippet defines two string variables that are being referenced in two function delegates that return the respective strings as result:
+
+```csharp
+using System;
+
+public class C {
+    public void M() {
+        string s1 = "s1";
+        string s2 = "s2";
+        Func<string, string> a = str => s1;
+        Func<string, string> b = str => s2;
+    }
+}
+```
+
+The above C# code generates the following C# disassembly (credit source: [sharplab.io](https://sharplab.io)) code from the compiler:
+
+```csharp
+public class C
+{
+    [CompilerGenerated]
+    private sealed class <>c__DisplayClass0_0
+    {
+        public string s1;
+
+        public string s2;
+
+        internal string <M>b__0(string str)
+        {
+            return s1;
+        }
+
+        internal string <M>b__1(string str)
+        {
+            return s2;
+        }
+    }
+
+    public void M()
+    {
+        <>c__DisplayClass0_0 <>c__DisplayClass0_ = new <>c__DisplayClass0_0();
+        <>c__DisplayClass0_.s1 = "s1";
+        <>c__DisplayClass0_.s2 = "s2";
+        Func<string, string> func = new Func<string, string>(<>c__DisplayClass0_.<M>b__0);
+        Func<string, string> func2 = new Func<string, string>(<>c__DisplayClass0_.<M>b__1);
+    }
+}
+```
+As can be seen in the above decompiled code, both `func` and `func2` share the same closure `<>c__DisplayClass0_0`, which is the target that is serialized when serializing the delegates `func` and `func2`. Hence, even though `Func<string, string> a` is only referencing `s1`, `s2` also gets serialized when sending over the bytes to the workers.
+
+This can lead to some unexpected behaviors at runtime (like in the case of using [broadcast variables](broadcast-guide.md)), which is why we recommend restricting the visibility of the variables used in a function to that function's scope.
+
+Going back to the above example, the following is the recommended way to implement the desired behavior of previous code snippet:
+
+```csharp
+using System;
+
+public class C {
+    public void M() {
+        {
+            string s1 = "s1";
+            Func<string, string> a = str => s1;
+        }
+        {
+            string s2 = "s2";
+            Func<string, string> b = str => s2;
+        }
+    }
+}
+```
+
+The above C# code generates the following C# disassembly (credit source: [sharplab.io](https://sharplab.io)) code from the compiler:
+
+```csharp
+public class C
+{
+    [CompilerGenerated]
+    private sealed class <>c__DisplayClass0_0
+    {
+        public string s1;
+
+        internal string <M>b__0(string str)
+        {
+            return s1;
+        }
+    }
+
+    [CompilerGenerated]
+    private sealed class <>c__DisplayClass0_1
+    {
+        public string s2;
+
+        internal string <M>b__1(string str)
+        {
+            return s2;
+        }
+    }
+
+    public void M()
+    {
+        <>c__DisplayClass0_0 <>c__DisplayClass0_ = new <>c__DisplayClass0_0();
+        <>c__DisplayClass0_.s1 = "s1";
+        Func<string, string> func = new Func<string, string>(<>c__DisplayClass0_.<M>b__0);
+        <>c__DisplayClass0_1 <>c__DisplayClass0_2 = new <>c__DisplayClass0_1();
+        <>c__DisplayClass0_2.s2 = "s2";
+        Func<string, string> func2 = new Func<string, string>(<>c__DisplayClass0_2.<M>b__1);
+    }
+}
+```
+
+Here we see that `func` and `func2` no longer share a closure and have their own separate closures `<>c__DisplayClass0_0` and `<>c__DisplayClass0_1` respectively. When used as the target for serialization, nothing other than the referenced variables will get serialized for the delegate.
+
+This behavior is important to keep in mind while implementing multiple UDFs in a common scope. 
+To learn more about UDFs in general, please review the following articles that explain UDFs and how to use them: [UDFs in databricks(scala)](https://docs.databricks.com/spark/latest/spark-sql/udf-scala.html), [Spark UDFs and some gotchas](https://medium.com/@achilleus/spark-udfs-we-can-use-them-but-should-we-use-them-2c5a561fde6d).
\ No newline at end of file

From ce2317774c25550ea3784a1af4f02befd2eea70b Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Thu, 28 May 2020 19:29:27 -0700
Subject: [PATCH 03/27] Resolve nuget dependencies for UDFs defined in
 dotnet-interactive (#515)

---
 .../DeltaTableTests.cs                        |   1 +
 .../IpcTests/BroadcastTests.cs                |   4 +-
 .../IpcTests/ML/Feature/BucketizerTests.cs    |   2 +-
 .../IpcTests/ML/Feature/HashingTFTests.cs     |   5 +-
 .../IpcTests/ML/Feature/IDFModelTests.cs      |   2 +-
 .../IpcTests/ML/Feature/IDFTests.cs           |   2 +-
 .../IpcTests/ML/Feature/TokenizerTests.cs     |   2 +-
 .../IpcTests/ML/Feature/Word2VecModelTests.cs |   3 +-
 .../IpcTests/ML/Feature/Word2VecTests.cs      |   2 +-
 .../IpcTests/SparkContextTests.cs             |   2 +-
 .../IpcTests/Sql/DataFrameTests.cs            |   2 +-
 .../IpcTests/Sql/DataFrameWriterTests.cs      |   2 +-
 .../Sql/Streaming/DataStreamWriterTests.cs    |   1 +
 .../Microsoft.Spark.E2ETest.csproj            |   1 +
 .../Microsoft.Spark.E2ETest/SparkFixture.cs   |   2 +-
 .../AssemblyLoaderTests.cs                    |   5 +-
 .../CollectionUtilsTests.cs                   |  26 +++
 .../CommandSerDeTests.cs                      |   1 +
 .../DependencyProviderUtilsTests.cs           | 154 ++++++++++++++++++
 .../Microsoft.Spark.UnitTest/SparkFixture.cs  | 109 +++++++++++++
 .../Sql/ColumnTests.cs                        |  65 +-------
 .../TestUtils}/TemporaryDirectory.cs          | 126 +++++++-------
 .../Microsoft.Spark.UnitTest/UdfSerDeTests.cs |   1 +
 .../DaemonWorkerTests.cs                      |   1 +
 .../DependencyProviderTests.cs                |  64 ++++++++
 .../Microsoft.Spark.Worker.UnitTest.csproj    |   6 +
 .../PayloadProcessorTests.cs                  |   1 +
 .../Microsoft.Spark.Worker.csproj             |   4 +
 .../Processor/PayloadProcessor.cs             |  30 +---
 .../Utils/AssemblyLoaderHelper.cs             |  93 +++++++++++
 .../Utils/DependencyProvider.cs               |  87 ++++++++++
 .../Interop/Ipc/IJvmBridgeFactory.cs          |  11 ++
 .../Interop/Ipc/JvmBridgeFactory.cs           |  14 ++
 .../Interop/SparkEnvironment.cs               |  25 ++-
 .../Microsoft.Spark/ML/Feature/Bucketizer.cs  |   2 +-
 src/csharp/Microsoft.Spark/SparkFiles.cs      |  39 ++++-
 .../Microsoft.Spark/Utils/AssemblyLoader.cs   |  50 ++++--
 .../Microsoft.Spark/Utils/CollectionUtils.cs  |  18 ++
 .../Utils/DependencyProviderUtils.cs          |  99 +++++++++++
 src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs  |  19 ++-
 src/csharp/Microsoft.Spark/Utils/UdfUtils.cs  |  23 +--
 41 files changed, 898 insertions(+), 208 deletions(-)
 create mode 100644 src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs
 create mode 100644 src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
 create mode 100644 src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
 rename src/csharp/{Microsoft.Spark.E2ETest/Utils => Microsoft.Spark.UnitTest/TestUtils}/TemporaryDirectory.cs (87%)
 create mode 100644 src/csharp/Microsoft.Spark.Worker.UnitTest/DependencyProviderTests.cs
 create mode 100644 src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
 create mode 100644 src/csharp/Microsoft.Spark.Worker/Utils/DependencyProvider.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/IJvmBridgeFactory.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridgeFactory.cs
 create mode 100644 src/csharp/Microsoft.Spark/Utils/CollectionUtils.cs
 create mode 100644 src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs

diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
index 69249d8c5..fab7c74dc 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
@@ -11,6 +11,7 @@
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Streaming;
 using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.Extensions.Delta.E2ETest
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
index 000c8f27e..511f5a122 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
@@ -1,10 +1,8 @@
 ﻿using System;
-using System.Collections.Generic;
 using System.Linq;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Sql;
-using static Microsoft.Spark.Sql.Functions;
 using Xunit;
+using static Microsoft.Spark.Sql.Functions;
 
 namespace Microsoft.Spark.E2ETest.IpcTests
 {
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
index 11037bc6d..a075334de 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
@@ -4,9 +4,9 @@
 
 using System.Collections.Generic;
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
index 7b6882bea..df459ed7a 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
@@ -2,13 +2,10 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
-using System.Collections.Generic;
 using System.IO;
-using System.Linq;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
index 623b7322c..202187809 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
@@ -3,9 +3,9 @@
 // See the LICENSE file in the project root for more information.
 
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
index 3dea63de7..72da97887 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
@@ -3,9 +3,9 @@
 // See the LICENSE file in the project root for more information.
 
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
index 8cdb4e03a..4b1998f50 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
@@ -3,9 +3,9 @@
 // See the LICENSE file in the project root for more information.
 
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
index 4845e011a..a5227149b 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
@@ -2,11 +2,10 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
index 30e14ed28..1d5da5335 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
@@ -3,9 +3,9 @@
 // See the LICENSE file in the project root for more information.
 
 using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.ML.Feature;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
index 07fbf2372..ca752570a 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
@@ -3,7 +3,7 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
-using Microsoft.Spark.E2ETest.Utils;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
index 7359bdb6b..46e899a87 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
@@ -3,13 +3,13 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
-using System.Collections.Generic;
 using System.Linq;
 using Apache.Arrow;
 using Microsoft.Data.Analysis;
 using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 using static Microsoft.Spark.Sql.Functions;
 using static Microsoft.Spark.UnitTest.TestUtils.ArrowTestUtils;
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
index a7e214160..4f0d06742 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
@@ -3,8 +3,8 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Collections.Generic;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest.IpcTests
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
index 4e87dc6c6..15c2a22a7 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
@@ -10,6 +10,7 @@
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Streaming;
 using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 using static Microsoft.Spark.Sql.Functions;
 
diff --git a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
index abe436ec9..e03519853 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
+++ b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
@@ -23,6 +23,7 @@
 
   <ItemGroup>
     <Compile Include="..\Microsoft.Spark.UnitTest\TestUtils\ArrowTestUtils.cs" />
+    <Compile Include="..\Microsoft.Spark.UnitTest\TestUtils\TemporaryDirectory.cs" />
   </ItemGroup>
 
 </Project>
diff --git a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
index fc8272c5b..6d8dadbac 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
@@ -7,9 +7,9 @@
 using System.IO;
 using System.Reflection;
 using System.Runtime.InteropServices;
-using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
 
 namespace Microsoft.Spark.E2ETest
diff --git a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
index da7d05197..f2f0dd30e 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
@@ -9,17 +9,19 @@
 
 namespace Microsoft.Spark.UnitTest
 {
+    [Collection("Spark Unit Tests")]
     public class AssemblyLoaderTests
     {
         [Fact]
         public void TestAssemblySearchPathResolver()
         {
+            string sparkFilesDir = SparkFiles.GetRootDirectory();
             string curDir = Directory.GetCurrentDirectory();
             string appDir = AppDomain.CurrentDomain.BaseDirectory;
 
             // Test the default scenario.
             string[] searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths();
-            Assert.Equal(new[] { curDir, appDir }, searchPaths);
+            Assert.Equal(new[] { sparkFilesDir, curDir, appDir }, searchPaths);
 
             // Test the case where DOTNET_ASSEMBLY_SEARCH_PATHS is defined.
             char sep = Path.PathSeparator;
@@ -34,6 +36,7 @@ public void TestAssemblySearchPathResolver()
                     "mydir2",
                     Path.Combine(curDir, $".{sep}mydir3"),
                     Path.Combine(curDir, $".{sep}mydir4"),
+                    sparkFilesDir,
                     curDir,
                     appDir },
                 searchPaths);
diff --git a/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs b/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs
new file mode 100644
index 000000000..9a723b2b5
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs
@@ -0,0 +1,26 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.Utils;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+    public class CollectionUtilsTests
+    {
+        [Fact]
+        public void TestArrayEquals()
+        {
+            Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, null));
+            Assert.False(CollectionUtils.ArrayEquals(null, new int[] { 1 }));
+            Assert.False(CollectionUtils.ArrayEquals(new int[] { }, new int[] { 1 }));
+            Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { }));
+            Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 1, 2 }));
+            Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 2 }));
+
+            Assert.True(CollectionUtils.ArrayEquals<object>(null, null));
+            Assert.True(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 1 }));
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/CommandSerDeTests.cs b/src/csharp/Microsoft.Spark.UnitTest/CommandSerDeTests.cs
index 557bdcc72..820d7dea0 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/CommandSerDeTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/CommandSerDeTests.cs
@@ -14,6 +14,7 @@
 
 namespace Microsoft.Spark.UnitTest
 {
+    [Collection("Spark Unit Tests")]
     public class CommandSerDeTests
     {
         [Fact]
diff --git a/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs b/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
new file mode 100644
index 000000000..ccfc4890b
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
@@ -0,0 +1,154 @@
+﻿using System.IO;
+using System.Linq;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Microsoft.Spark.Utils;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+    public class DependencyProviderUtilsTests
+    {
+        [Fact]
+        public void TestNuGetMetadataEquals()
+        {
+            string expectedFileName = "package.name.1.0.0.nupkg";
+            string expectedPackageName = "package.name";
+            string expectedPackageVersion = "1.0.0";
+
+            var nugetMetadata = new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = expectedFileName,
+                PackageName = expectedPackageName,
+                PackageVersion = expectedPackageVersion
+            };
+
+            Assert.False(nugetMetadata.Equals(null));
+            Assert.False(nugetMetadata.Equals(new DependencyProviderUtils.NuGetMetadata()));
+            Assert.False(nugetMetadata.Equals(new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = "",
+                PackageName = expectedPackageName,
+                PackageVersion = expectedPackageVersion
+            }));
+            Assert.False(nugetMetadata.Equals(new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = expectedFileName,
+                PackageName = "",
+                PackageVersion = expectedPackageVersion
+            }));
+            Assert.False(nugetMetadata.Equals(new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = expectedFileName,
+                PackageName = expectedPackageName,
+                PackageVersion = ""
+            }));
+
+            Assert.True(nugetMetadata.Equals(new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = expectedFileName,
+                PackageName = expectedPackageName,
+                PackageVersion = expectedPackageVersion
+            }));
+        }
+
+        [Fact]
+        public void TestMetadataEquals()
+        {
+            string expectedAssemblyProbingPath = "/assembly/probe/path";
+            string expectedNativeProbingPath = "/native/probe/path";
+            var expectedNugetMetadata = new DependencyProviderUtils.NuGetMetadata
+            {
+                FileName = "package.name.1.0.0.nupkg",
+                PackageName = "package.name",
+                PackageVersion = "1.0.0"
+            };
+
+            var metadata = new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { expectedAssemblyProbingPath },
+                NativeProbingPaths = new string[] { expectedNativeProbingPath },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[] { expectedNugetMetadata }
+            };
+
+            Assert.False(metadata.Equals(null));
+            Assert.False(metadata.Equals(new DependencyProviderUtils.Metadata()));
+            Assert.False(metadata.Equals(new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { expectedAssemblyProbingPath },
+                NativeProbingPaths = new string[] { expectedNativeProbingPath, "" },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[] { expectedNugetMetadata }
+            }));
+            Assert.False(metadata.Equals(new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { expectedAssemblyProbingPath },
+                NativeProbingPaths = new string[] { expectedNativeProbingPath },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[] { expectedNugetMetadata, null }
+            }));
+            Assert.False(metadata.Equals(new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { expectedAssemblyProbingPath, "" },
+                NativeProbingPaths = new string[] { expectedNativeProbingPath },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[] { expectedNugetMetadata }
+            }));
+
+            Assert.True(metadata.Equals(new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { expectedAssemblyProbingPath },
+                NativeProbingPaths = new string[] { expectedNativeProbingPath },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[] { expectedNugetMetadata }
+            }));
+        }
+
+        [Fact]
+        public void TestMetadataSerDe()
+        {
+            using var tempDir = new TemporaryDirectory();
+            var metadata = new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { "/assembly/probe/path" },
+                NativeProbingPaths = new string[] { "/native/probe/path" },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[]
+                {
+                    new DependencyProviderUtils.NuGetMetadata
+                    {
+                        FileName = "package.name.1.0.0.nupkg",
+                        PackageName = "package.name",
+                        PackageVersion = "1.0.0"
+                    }
+                }
+            };
+
+            string serializedFilePath = Path.Combine(tempDir.Path, "serializedMetadata");
+            metadata.Serialize(serializedFilePath);
+
+            DependencyProviderUtils.Metadata deserializedMetadata =
+                DependencyProviderUtils.Metadata.Deserialize(serializedFilePath);
+
+            Assert.True(metadata.Equals(deserializedMetadata));
+        }
+
+        [Fact]
+        public void TestFileNames()
+        {
+            using var tempDir = new TemporaryDirectory();
+            foreach (ulong num in Enumerable.Range(0, 3).Select(x => System.Math.Pow(10, x)))
+            {
+                string filePath =
+                    Path.Combine(tempDir.Path, DependencyProviderUtils.CreateFileName(num));
+                File.Create(filePath).Dispose();
+            }
+
+            var expectedFiles = new string[] 
+            {
+                "dependencyProviderMetadata_00000000000000000001",
+                "dependencyProviderMetadata_00000000000000000010",
+                "dependencyProviderMetadata_00000000000000000100",
+            };
+            IOrderedEnumerable<string> actualFiles = DependencyProviderUtils
+                .GetMetadataFiles(tempDir.Path)
+                .Select(f => Path.GetFileName(f))
+                .OrderBy(s => s);
+            Assert.True(expectedFiles.SequenceEqual(actualFiles));
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs b/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
new file mode 100644
index 000000000..02f2c8b3b
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
@@ -0,0 +1,109 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.IO;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Ipc;
+using Moq;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+    public sealed class SparkFixture : IDisposable
+    {
+        internal Mock<IJvmBridge> MockJvm { get; private set; }
+
+        public SparkFixture()
+        {
+            SetupBasicMockJvm();
+
+            // Unit tests may contain calls that hit the AssemblyLoader.
+            // One of the AssemblyLoader assembly search paths is populated
+            // using SparkFiles. Unless we are running in an E2E scenario and
+            // on the Worker, SparkFiles will attempt to call the JVM. Because
+            // this is a (non E2E) Unit test, it is necessary to mock this call.
+            SetupSparkFiles();
+
+            var mockJvmBridgeFactory = new Mock<IJvmBridgeFactory>();
+            mockJvmBridgeFactory
+                .Setup(m => m.Create(It.IsAny<int>()))
+                .Returns(MockJvm.Object);
+
+            SparkEnvironment.JvmBridgeFactory = mockJvmBridgeFactory.Object;
+        }
+
+        public void Dispose()
+        {
+        }
+
+        private void SetupBasicMockJvm()
+        {
+            MockJvm = new Mock<IJvmBridge>();
+
+            MockJvm
+                .Setup(m => m.CallStaticJavaMethod(
+                    It.IsAny<string>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+            MockJvm
+                .Setup(m => m.CallStaticJavaMethod(
+                    It.IsAny<string>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object>(),
+                    It.IsAny<object>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+            MockJvm
+                .Setup(m => m.CallStaticJavaMethod(
+                    It.IsAny<string>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object[]>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+
+            MockJvm
+                .Setup(m => m.CallNonStaticJavaMethod(
+                    It.IsAny<JvmObjectReference>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+            MockJvm
+                .Setup(m => m.CallNonStaticJavaMethod(
+                    It.IsAny<JvmObjectReference>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object>(),
+                    It.IsAny<object>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+            MockJvm
+                .Setup(m => m.CallNonStaticJavaMethod(
+                    It.IsAny<JvmObjectReference>(),
+                    It.IsAny<string>(),
+                    It.IsAny<object[]>()))
+                .Returns(
+                    new JvmObjectReference("result", MockJvm.Object));
+        }
+
+        private void SetupSparkFiles()
+        {
+            MockJvm
+                .Setup(m => m.CallStaticJavaMethod(
+                    "org.apache.spark.SparkFiles",
+                    "getRootDirectory"))
+                .Returns("SparkFilesRootDirectory");
+        }
+    }
+
+    [CollectionDefinition("Spark Unit Tests")]
+    public class SparkCollection : ICollectionFixture<SparkFixture>
+    {
+        // This class has no code, and is never created. Its purpose is simply
+        // to be the place to apply [CollectionDefinition] and all the
+        // ICollectionFixture<> interfaces.
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/Sql/ColumnTests.cs b/src/csharp/Microsoft.Spark.UnitTest/Sql/ColumnTests.cs
index f88d53800..adffd9312 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/Sql/ColumnTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/Sql/ColumnTests.cs
@@ -12,71 +12,12 @@
 
 namespace Microsoft.Spark.UnitTest
 {
-    public class ColumnTestsFixture : IDisposable
-    {
-        internal Mock<IJvmBridge> MockJvm { get; }
-
-        public ColumnTestsFixture()
-        {
-            MockJvm = new Mock<IJvmBridge>();
-
-            MockJvm
-                .Setup(m => m.CallStaticJavaMethod(
-                    It.IsAny<string>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-            MockJvm
-                .Setup(m => m.CallStaticJavaMethod(
-                    It.IsAny<string>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object>(),
-                    It.IsAny<object>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-            MockJvm
-                .Setup(m => m.CallStaticJavaMethod(
-                    It.IsAny<string>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object[]>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-
-            MockJvm
-                .Setup(m => m.CallNonStaticJavaMethod(
-                    It.IsAny<JvmObjectReference>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-            MockJvm
-                .Setup(m => m.CallNonStaticJavaMethod(
-                    It.IsAny<JvmObjectReference>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object>(),
-                    It.IsAny<object>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-            MockJvm
-                .Setup(m => m.CallNonStaticJavaMethod(
-                    It.IsAny<JvmObjectReference>(),
-                    It.IsAny<string>(),
-                    It.IsAny<object[]>()))
-                .Returns(
-                    new JvmObjectReference("result", MockJvm.Object));
-        }
-
-        public void Dispose()
-        {
-        }
-    }
-
-    public class ColumnTests : IClassFixture<ColumnTestsFixture>
+    [Collection("Spark Unit Tests")]
+    public class ColumnTests
     {
         private readonly Mock<IJvmBridge> _mockJvm;
 
-        public ColumnTests(ColumnTestsFixture fixture)
+        public ColumnTests(SparkFixture fixture)
         {
             _mockJvm = fixture.MockJvm;
         }
diff --git a/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs b/src/csharp/Microsoft.Spark.UnitTest/TestUtils/TemporaryDirectory.cs
similarity index 87%
rename from src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs
rename to src/csharp/Microsoft.Spark.UnitTest/TestUtils/TemporaryDirectory.cs
index 556b78f99..98d3c18f3 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/Utils/TemporaryDirectory.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/TestUtils/TemporaryDirectory.cs
@@ -1,63 +1,63 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System;
-using System.IO;
-
-namespace Microsoft.Spark.E2ETest.Utils
-{
-    /// <summary>
-    /// Creates a temporary folder that is automatically cleaned up when disposed.
-    /// </summary>
-    internal sealed class TemporaryDirectory : IDisposable
-    {
-        private bool disposed = false;
-
-        /// <summary>
-        /// Path to temporary folder.
-        /// </summary>
-        public string Path { get; }
-
-        public TemporaryDirectory()
-        {
-            Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
-            Cleanup();
-            Directory.CreateDirectory(Path);
-            Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
-        }
-
-        public void Dispose()
-        {
-            Dispose(true);
-            GC.SuppressFinalize(this);
-        }
-
-        private void Cleanup()
-        {
-            if (File.Exists(Path))
-            {
-                File.Delete(Path);
-            }
-            else if (Directory.Exists(Path))
-            {
-                Directory.Delete(Path, true);
-            }
-        }
-
-        private void Dispose(bool disposing)
-        {
-            if (disposed)
-            {
-                return;
-            }
-
-            if (disposing)
-            {
-                Cleanup();
-            }
-
-            disposed = true;
-        }
-    }
-}
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.IO;
+
+namespace Microsoft.Spark.UnitTest.TestUtils
+{
+    /// <summary>
+    /// Creates a temporary folder that is automatically cleaned up when disposed.
+    /// </summary>
+    internal sealed class TemporaryDirectory : IDisposable
+    {
+        private bool _disposed = false;
+
+        /// <summary>
+        /// Path to temporary folder.
+        /// </summary>
+        public string Path { get; }
+
+        public TemporaryDirectory()
+        {
+            Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
+            Cleanup();
+            Directory.CreateDirectory(Path);
+            Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}";
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        private void Cleanup()
+        {
+            if (File.Exists(Path))
+            {
+                File.Delete(Path);
+            }
+            else if (Directory.Exists(Path))
+            {
+                Directory.Delete(Path, true);
+            }
+        }
+
+        private void Dispose(bool disposing)
+        {
+            if (_disposed)
+            {
+                return;
+            }
+
+            if (disposing)
+            {
+                Cleanup();
+            }
+
+            _disposed = true;
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/UdfSerDeTests.cs b/src/csharp/Microsoft.Spark.UnitTest/UdfSerDeTests.cs
index 6928150d0..bf4ef29f4 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/UdfSerDeTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/UdfSerDeTests.cs
@@ -11,6 +11,7 @@
 
 namespace Microsoft.Spark.UnitTest
 {
+    [Collection("Spark Unit Tests")]
     public class UdfSerDeTests
     {
         [Serializable]
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
index 0490660e3..5fac38035 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
@@ -15,6 +15,7 @@
 
 namespace Microsoft.Spark.Worker.UnitTest
 {
+    [Collection("Spark Unit Tests")]
     public class DaemonWorkerTests
     {
         [Fact]
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/DependencyProviderTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/DependencyProviderTests.cs
new file mode 100644
index 000000000..6643ba2ab
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/DependencyProviderTests.cs
@@ -0,0 +1,64 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.IO;
+using System.IO.Compression;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Microsoft.Spark.Utils;
+using Microsoft.Spark.Worker.Utils;
+using Xunit;
+
+namespace Microsoft.Spark.Worker.UnitTest
+{
+    [Collection("Spark Unit Tests")]
+    public class DependencyProviderTests
+    {
+        [Fact]
+        public void TestsUnpackPackages()
+        {
+            string packageFileName = "package.name.1.0.0.nupkg";
+            string packageName = "package.name";
+            string packageVersion = "1.0.0";
+
+            using var emptyFileDir = new TemporaryDirectory();
+            string emptyFileName = "emptyfile";
+            File.Create(Path.Combine(emptyFileDir.Path, emptyFileName)).Dispose();
+
+            using var nupkgDir = new TemporaryDirectory();
+            ZipFile.CreateFromDirectory(
+                emptyFileDir.Path,
+                Path.Combine(nupkgDir.Path, packageFileName));
+
+            var metadata = new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[] { "/assembly/probe/path" },
+                NativeProbingPaths = new string[] { "/native/probe/path" },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[]
+                {
+                    new DependencyProviderUtils.NuGetMetadata
+                    {
+                        FileName = packageFileName,
+                        PackageName = packageName,
+                        PackageVersion = packageVersion
+                    }
+                }
+            };
+
+            using var unpackDir = new TemporaryDirectory();
+            string metadataFilePath =
+                Path.Combine(nupkgDir.Path, DependencyProviderUtils.CreateFileName(1));
+            metadata.Serialize(metadataFilePath);
+
+            // Files located in nupkgDir
+            // nuget: package.name.1.0.0.nupkg
+            // metadata file: dependencyProviderMetadata_00000000000000000001
+            var dependencyProvider =
+                new DependencyProvider(metadataFilePath, nupkgDir.Path, unpackDir.Path);
+            string expectedPackagePath =
+                Path.Combine(unpackDir.Path, ".nuget", "packages", packageName, packageVersion);
+            string expectedFilePath = Path.Combine(expectedPackagePath, emptyFileName);
+            Assert.True(File.Exists(expectedFilePath));
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj b/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj
index 1b68d2e45..1371d5d1b 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj
@@ -4,13 +4,19 @@
     <TargetFramework>netcoreapp3.1</TargetFramework>
   </PropertyGroup>
 
+  <ItemGroup>
+    <PackageReference Include="Moq" Version="4.10.0" />
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" />
     <ProjectReference Include="..\Microsoft.Spark\Microsoft.Spark.csproj" />
   </ItemGroup>
 
   <ItemGroup>
+    <Compile Include="..\Microsoft.Spark.UnitTest\SparkFixture.cs" />
     <Compile Include="..\Microsoft.Spark.UnitTest\TestUtils\ArrowTestUtils.cs" />
+    <Compile Include="..\Microsoft.Spark.UnitTest\TestUtils\TemporaryDirectory.cs" />
   </ItemGroup>
 
 </Project>
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/PayloadProcessorTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/PayloadProcessorTests.cs
index c586e9dc2..24370abcb 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/PayloadProcessorTests.cs
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/PayloadProcessorTests.cs
@@ -14,6 +14,7 @@
 
 namespace Microsoft.Spark.Worker.UnitTest
 {
+    [Collection("Spark Unit Tests")]
     public class PayloadProcessorTests
     {
         [Theory]
diff --git a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
index cff20b084..1be21a7ac 100644
--- a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
+++ b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
@@ -6,6 +6,9 @@
     <TargetFrameworks Condition="'$(OS)' != 'Windows_NT'">netcoreapp3.1</TargetFrameworks>
     <RootNamespace>Microsoft.Spark.Worker</RootNamespace>
     <GenerateDocumentationFile>true</GenerateDocumentationFile>
+
+    <!--Microsoft.DotNet.DependencyManager requires this additional source-->
+    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
   </PropertyGroup>
 
   <ItemGroup>
@@ -13,6 +16,7 @@
   </ItemGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.DotNet.DependencyManager" Version="10.10.0-beta.20254.4" />
     <PackageReference Include="System.Memory" Version="4.5.2" />
   </ItemGroup>
 
diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/PayloadProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/PayloadProcessor.cs
index 2acc89933..58dd588aa 100644
--- a/src/csharp/Microsoft.Spark.Worker/Processor/PayloadProcessor.cs
+++ b/src/csharp/Microsoft.Spark.Worker/Processor/PayloadProcessor.cs
@@ -7,12 +7,7 @@
 using System.Collections.Generic;
 using System.IO;
 using Microsoft.Spark.Interop.Ipc;
-using Microsoft.Spark.Utils;
-
-#if NETCOREAPP
-using System.Reflection;
-using System.Runtime.Loader;
-#endif
+using Microsoft.Spark.Worker.Utils;
 
 namespace Microsoft.Spark.Worker.Processor
 {
@@ -28,20 +23,6 @@ internal PayloadProcessor(Version version)
             _version = version;
         }
 
-        static PayloadProcessor()
-        {
-#if NETCOREAPP
-            AssemblyLoader.LoadFromFile = AssemblyLoadContext.Default.LoadFromAssemblyPath;
-            AssemblyLoader.LoadFromName = (asmName) =>
-                AssemblyLoadContext.Default.LoadFromAssemblyName(new AssemblyName(asmName));
-            AssemblyLoadContext.Default.Resolving += (assemblyLoadContext, assemblyName) =>
-                AssemblyLoader.ResolveAssembly(assemblyName.FullName);
-#else
-            AppDomain.CurrentDomain.AssemblyResolve += (object sender, ResolveEventArgs args) =>
-                AssemblyLoader.ResolveAssembly(args.Name);
-#endif
-        }
-
         /// <summary>
         /// Processes the given stream to construct a Payload object.
         /// </summary>
@@ -79,8 +60,15 @@ internal Payload Process(Stream stream)
             TaskContextHolder.Set(payload.TaskContext);
 
             payload.SparkFilesDir = SerDe.ReadString(stream);
+            SparkFiles.SetRootDirectory(payload.SparkFilesDir);
+
+            // Register additional assembly handlers after SparkFilesDir has been set
+            // and before any deserialization occurs. BroadcastVariableProcessor may
+            // deserialize objects from assemblies that are not currently loaded within
+            // our current context.
+            AssemblyLoaderHelper.RegisterAssemblyHandler();
 
-            if (Utils.SettingUtils.IsDatabricks)
+            if (SettingUtils.IsDatabricks)
             {
                 SerDe.ReadString(stream);
                 SerDe.ReadString(stream);
diff --git a/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs b/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
new file mode 100644
index 000000000..b74228073
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
@@ -0,0 +1,93 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Concurrent;
+using System.IO;
+using Microsoft.Spark.Services;
+using Microsoft.Spark.Utils;
+
+#if NETCOREAPP
+using System.Runtime.Loader;
+#endif
+
+namespace Microsoft.Spark.Worker.Utils
+{
+    internal static class AssemblyLoaderHelper
+    {
+        private static readonly ILoggerService s_logger =
+            LoggerServiceFactory.GetLogger(typeof(AssemblyLoaderHelper));
+
+        // A mapping between a metadata file's path to its respective DependencyProvider.
+        private static readonly ConcurrentDictionary<string, Lazy<DependencyProvider>>
+            s_dependencyProviders = new ConcurrentDictionary<string, Lazy<DependencyProvider>>();
+
+        private static readonly bool s_runningREPL =
+            EnvironmentUtils.GetEnvironmentVariableAsBool("DOTNET_SPARK_RUNNING_REPL");
+
+        /// <summary>
+        /// Register the AssemblyLoader.ResolveAssembly handler to handle the
+        /// event when assemblies fail to load in the current assembly load context.
+        /// </summary>
+        static AssemblyLoaderHelper()
+        {
+#if NETCOREAPP
+            AssemblyLoader.LoadFromFile = AssemblyLoadContext.Default.LoadFromAssemblyPath;
+            AssemblyLoadContext.Default.Resolving += (assemblyLoadContext, assemblyName) =>
+                AssemblyLoader.ResolveAssembly(assemblyName.FullName);
+#else
+            AppDomain.CurrentDomain.AssemblyResolve += (object sender, ResolveEventArgs args) =>
+                AssemblyLoader.ResolveAssembly(args.Name);
+#endif
+        }
+
+        /// <summary>
+        /// In a dotnet-interactive REPL session (driver), nuget dependencies will be
+        /// systematically added using <see cref="SparkContext.AddFile(string, bool)"/>.
+        ///
+        /// These files include:
+        /// - "{packagename}.{version}.nupkg"
+        ///   The nuget packages
+        /// - <see cref="DependencyProviderUtils.CreateFileName(ulong)"/>
+        ///   Serialized <see cref="DependencyProviderUtils.Metadata"/> object.
+        ///
+        /// On the Worker, in order to resolve the nuget dependencies referenced by
+        /// the dotnet-interactive session, we instantiate a
+        /// <see cref="DotNet.DependencyManager.DependencyProvider"/>.
+        /// This provider will register an event handler to the Assembly Load Resolving event.
+        /// By using <see cref="SparkFiles.GetRootDirectory"/>, we can access the
+        /// required files added to the <see cref="SparkContext"/>.
+        /// </summary>
+        internal static void RegisterAssemblyHandler()
+        {
+            if (!s_runningREPL)
+            {
+                return;
+            }
+
+            string sparkFilesPath = SparkFiles.GetRootDirectory();
+            string[] metadataFiles =
+                DependencyProviderUtils.GetMetadataFiles(sparkFilesPath);
+            foreach (string metdatafile in metadataFiles)
+            {
+                // The execution of the delegate passed to GetOrAdd is not guaranteed to run once.
+                // Multiple Lazy objects may be created, but only one of them will be added to the
+                // ConcurrentDictionary. The Lazy value is retrieved to materialize the
+                // DependencyProvider object if it hasn't already been created.
+                Lazy<DependencyProvider> dependecyProvider = s_dependencyProviders.GetOrAdd(
+                    metdatafile,
+                    mdf => new Lazy<DependencyProvider>(
+                        () =>
+                        {
+                            s_logger.LogInfo($"Creating {nameof(DependencyProvider)} using {mdf}");
+                            return new DependencyProvider(
+                                mdf,
+                                sparkFilesPath,
+                                Directory.GetCurrentDirectory());
+                        }));
+                _ = dependecyProvider.Value;
+            }
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.Worker/Utils/DependencyProvider.cs b/src/csharp/Microsoft.Spark.Worker/Utils/DependencyProvider.cs
new file mode 100644
index 000000000..d15bda3a1
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.Worker/Utils/DependencyProvider.cs
@@ -0,0 +1,87 @@
+﻿using System;
+using System.Collections.Generic;
+using System.IO;
+using System.IO.Compression;
+using Microsoft.Spark.Utils;
+using DepManager = Microsoft.DotNet.DependencyManager;
+
+namespace Microsoft.Spark.Worker.Utils
+{
+    /// <summary>
+    /// <see cref="DependencyProvider"/> sets up and creates a new
+    /// <see cref="DepManager.DependencyProvider"/>.
+    ///
+    /// The following steps outline the process:
+    /// - Deserializes a <see cref="DependencyProviderUtils.Metadata"/>.
+    /// - Uses <see cref="DependencyProviderUtils.Metadata.NuGets"/> to unpack required
+    ///   nugets.
+    /// - Uses <see cref="DependencyProviderUtils.Metadata.AssemblyProbingPaths"/> and
+    ///   <see cref="DependencyProviderUtils.Metadata.NativeProbingPaths"/> to construct
+    ///   a <see cref="DepManager.DependencyProvider"/>.
+    /// </summary>
+    internal class DependencyProvider : IDisposable
+    {
+        private readonly DepManager.DependencyProvider _dependencyProvider;
+
+        internal DependencyProvider(string metadataFilePath, string srcPath, string dstPath)
+        {
+            DependencyProviderUtils.Metadata metadata =
+                DependencyProviderUtils.Metadata.Deserialize(metadataFilePath);
+
+            string unpackPath = Path.Combine(dstPath, ".nuget", "packages");
+            Directory.CreateDirectory(unpackPath);
+
+            UnpackPackages(srcPath, unpackPath, metadata.NuGets);
+
+            _dependencyProvider = CreateDependencyProvider(unpackPath, metadata);
+        }
+
+        public void Dispose()
+        {
+            (_dependencyProvider as IDisposable)?.Dispose();
+        }
+
+        private DepManager.DependencyProvider CreateDependencyProvider(
+            string basePath,
+            DependencyProviderUtils.Metadata metadata)
+        {
+            IEnumerable<string> AssemblyProbingPaths()
+            {
+                foreach (string dependency in metadata.AssemblyProbingPaths)
+                {
+                    yield return Path.Combine(basePath, dependency);
+                }
+            }
+
+            IEnumerable<string> NativeProbingRoots()
+            {
+                foreach (string dependency in metadata.NativeProbingPaths)
+                {
+                    yield return Path.Combine(basePath, dependency);
+                }
+            }
+
+            return new DepManager.DependencyProvider(
+                AssemblyProbingPaths,
+                NativeProbingRoots);
+        }
+
+        private void UnpackPackages(
+            string src,
+            string dst,
+            DependencyProviderUtils.NuGetMetadata[] nugetMetadata)
+        {
+            foreach (DependencyProviderUtils.NuGetMetadata metadata in nugetMetadata)
+            {
+                var packageDirectory = new DirectoryInfo(
+                    Path.Combine(dst, metadata.PackageName.ToLower(), metadata.PackageVersion));
+                if (!packageDirectory.Exists)
+                {
+                    ZipFile.ExtractToDirectory(
+                        Path.Combine(src, metadata.FileName),
+                        packageDirectory.FullName);
+                }
+            }
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/IJvmBridgeFactory.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/IJvmBridgeFactory.cs
new file mode 100644
index 000000000..428565527
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/IJvmBridgeFactory.cs
@@ -0,0 +1,11 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    internal interface IJvmBridgeFactory
+    {
+        IJvmBridge Create(int portNumber);
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridgeFactory.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridgeFactory.cs
new file mode 100644
index 000000000..9c9f4ca43
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridgeFactory.cs
@@ -0,0 +1,14 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    internal class JvmBridgeFactory : IJvmBridgeFactory
+    {
+        public IJvmBridge Create(int portNumber)
+        {
+            return new JvmBridge(portNumber);
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs b/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
index 2d19fd185..f2523d065 100644
--- a/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
+++ b/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
@@ -3,7 +3,6 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
-using System.Dynamic;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Services;
 
@@ -46,17 +45,26 @@ internal static Version SparkVersion
             }
         }
 
+        private static IJvmBridgeFactory s_jvmBridgeFactory;
+        internal static IJvmBridgeFactory JvmBridgeFactory
+        {
+            get
+            {
+                return s_jvmBridgeFactory ??= new JvmBridgeFactory();
+            }
+            set
+            {
+                s_jvmBridgeFactory = value;
+            }
+        }
+
         private static IJvmBridge s_jvmBridge;
         internal static IJvmBridge JvmBridge
         {
             get
             {
-                if (s_jvmBridge == null)
-                {
-                    s_jvmBridge = new JvmBridge(ConfigurationService.GetBackendPortNumber());
-                }
-
-                return s_jvmBridge;
+                return s_jvmBridge ??=
+                    JvmBridgeFactory.Create(ConfigurationService.GetBackendPortNumber());
             }
             set
             {
@@ -69,8 +77,7 @@ internal static IConfigurationService ConfigurationService
         {
             get
             {
-                return s_configurationService ??
-                    (s_configurationService = new ConfigurationService());
+                return s_configurationService ??= new ConfigurationService();
             }
             set
             {
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
index 823f13c1a..924c8b362 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
@@ -151,7 +151,7 @@ public Bucketizer SetInputCols(IEnumerable<string> value)
         /// Gets the name of the column the output data will be written to. This is set by
         /// SetInputCol
         /// </summary>
-        // <returns>string, the output column</returns>
+        /// <returns>string, the output column</returns>
         public string GetOutputCol()
         {
             return (string)_jvmObject.Invoke("getOutputCol");
diff --git a/src/csharp/Microsoft.Spark/SparkFiles.cs b/src/csharp/Microsoft.Spark/SparkFiles.cs
index 8b09933a7..8c6f6af4b 100644
--- a/src/csharp/Microsoft.Spark/SparkFiles.cs
+++ b/src/csharp/Microsoft.Spark/SparkFiles.cs
@@ -2,33 +2,62 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System;
+using System.IO;
 using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Ipc;
 
 namespace Microsoft.Spark
 {
     /// <summary>
-    /// Resolves paths to files added through `SparkContext.addFile()`.
+    /// Resolves paths to files added through <see cref="SparkContext.AddFile(string, bool)"/>.
     /// </summary>
     public static class SparkFiles
     {
         private static IJvmBridge Jvm { get; } = SparkEnvironment.JvmBridge;
         private static readonly string s_sparkFilesClassName = "org.apache.spark.SparkFiles";
 
+        [ThreadStatic]
+        private static string s_rootDirectory;
+
+        [ThreadStatic]
+        private static bool s_isRunningOnWorker;
+
         /// <summary>
-        /// Get the absolute path of a file added through `SparkContext.addFile()`.
+        /// Get the absolute path of a file added through
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
         /// </summary>
-        /// <param name="fileName">The name of the file added through `SparkContext.addFile()`
+        /// <param name="fileName">The name of the file added through
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
         /// </param>
         /// <returns>The absolute path of the file.</returns>
         public static string Get(string fileName) =>
-            (string)Jvm.CallStaticJavaMethod(s_sparkFilesClassName, "get", fileName);
+            Path.GetFullPath(Path.Combine(GetRootDirectory(), fileName));
 
         /// <summary>
-        /// Get the root directory that contains files added through `SparkContext.addFile()`.
+        /// Get the root directory that contains files added through
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
         /// </summary>
         /// <returns>The root directory that contains the files.</returns>
         public static string GetRootDirectory() =>
+            s_isRunningOnWorker ?
+            s_rootDirectory :
             (string)Jvm.CallStaticJavaMethod(s_sparkFilesClassName, "getRootDirectory");
+
+        /// <summary>
+        /// Set the root directory that contains files added through
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
+        /// <remarks>
+        /// This should only be called from the Microsoft.Spark.Worker.
+        /// </remarks>
+        /// </summary>
+        /// <param name="path">Root directory that contains files added
+        /// through <see cref="SparkContext.AddFile(string, bool)"/>.
+        /// </param>
+        internal static void SetRootDirectory(string path)
+        {
+            s_isRunningOnWorker = true;
+            s_rootDirectory = path;
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
index 621a81881..94a37dbb5 100644
--- a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
+++ b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
@@ -8,6 +8,7 @@
 using System.Reflection;
 using System.Runtime.InteropServices;
 using System.Text.RegularExpressions;
+using Microsoft.Spark.Services;
 
 namespace Microsoft.Spark.Utils
 {
@@ -20,8 +21,10 @@ internal static class AssemblySearchPathResolver
         /// precedence:
         /// 1) Comma-separated paths specified in DOTNET_ASSEMBLY_SEARCH_PATHS environment
         /// variable. Note that if a path starts with ".", the working directory will be prepended.
-        /// 2) The working directory.
-        /// 3) The directory of the application.
+        /// 2) The path of the files added through
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
+        /// 3) The working directory.
+        /// 4) The directory of the application.
         /// </summary>
         /// <remarks>
         /// The reason that the working directory has higher precedence than the directory
@@ -54,6 +57,12 @@ internal static string[] GetAssemblySearchPaths()
                 }
             }
 
+            string sparkFilesPath = SparkFiles.GetRootDirectory();
+            if (!string.IsNullOrWhiteSpace(sparkFilesPath))
+            {
+                searchPaths.Add(sparkFilesPath);
+            }
+
             searchPaths.Add(Directory.GetCurrentDirectory());
             searchPaths.Add(AppDomain.CurrentDomain.BaseDirectory);
 
@@ -65,13 +74,15 @@ internal static class AssemblyLoader
     {
         internal static Func<string, Assembly> LoadFromFile { get; set; } = Assembly.LoadFrom;
 
-        internal static Func<string, Assembly> LoadFromName { get; set; } = Assembly.Load;
+        private static readonly ILoggerService s_logger =
+            LoggerServiceFactory.GetLogger(typeof(AssemblyLoader));
 
         private static readonly Dictionary<string, Assembly> s_assemblyCache =
             new Dictionary<string, Assembly>();
 
-        private static readonly string[] s_searchPaths =
-            AssemblySearchPathResolver.GetAssemblySearchPaths();
+        // Lazily evaluate the assembly search paths because it has a dependency on SparkFiles.
+        private static readonly Lazy<string[]> s_searchPaths =
+            new Lazy<string[]>(() => AssemblySearchPathResolver.GetAssemblySearchPaths());
 
         private static readonly string[] s_extensions =
             RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ?
@@ -95,9 +106,7 @@ internal static class AssemblyLoader
         /// </summary>
         /// <param name="assemblyName">The full name of the assembly</param>
         /// <param name="assemblyFileName">Name of the file that contains the assembly</param>
-        /// <returns>Cached or Loaded Assembly</returns>
-        /// <exception cref="FileNotFoundException">Thrown if the assembly is not
-        /// found.</exception>
+        /// <returns>Cached or Loaded Assembly or null if not found</returns>
         internal static Assembly LoadAssembly(string assemblyName, string assemblyFileName)
         {
             // assemblyFileName is empty when serializing a UDF from within the REPL.
@@ -119,7 +128,14 @@ internal static Assembly LoadAssembly(string assemblyName, string assemblyFileNa
                     return assembly;
                 }
 
-                throw new FileNotFoundException($"Assembly '{assemblyName}' file not found '{assemblyFileName}' in '{string.Join(",", s_searchPaths)}'");
+                s_logger.LogWarn(
+                    string.Format(
+                        "Assembly '{0}' file not found '{1}' in '{2}'",
+                        assemblyName,
+                        assemblyFileName,
+                        string.Join(",", s_searchPaths.Value)));
+
+                return null;
             }
         }
 
@@ -129,9 +145,7 @@ internal static Assembly LoadAssembly(string assemblyName, string assemblyFileNa
         /// s_extension combination.
         /// </summary>
         /// <param name="assemblyName">The fullname of the assembly to load</param>
-        /// <returns>The loaded assembly</returns>
-        /// <exception cref="FileNotFoundException">Thrown if the assembly is not
-        /// found.</exception>
+        /// <returns>The loaded assembly or null if not found</returns>
         internal static Assembly ResolveAssembly(string assemblyName)
         {
             lock (s_cacheLock)
@@ -153,7 +167,15 @@ internal static Assembly ResolveAssembly(string assemblyName)
                     }
                 }
 
-                throw new FileNotFoundException($"Assembly '{assemblyName}' file not found '{simpleAsmName}[{string.Join(",", s_extensions)}]' in '{string.Join(",", s_searchPaths)}'");
+                s_logger.LogWarn(
+                    string.Format(
+                        "Assembly '{0}' file not found '{1}[{2}]' in '{3}'",
+                        assemblyName,
+                        simpleAsmName,
+                        string.Join(",", s_extensions),
+                        string.Join(",", s_searchPaths.Value)));
+
+                return null;
             }
         }
 
@@ -165,7 +187,7 @@ internal static Assembly ResolveAssembly(string assemblyName)
         /// <returns>True if assembly is loaded, false otherwise.</returns>
         private static bool TryLoadAssembly(string assemblyFileName, ref Assembly assembly)
         {
-            foreach (string searchPath in s_searchPaths)
+            foreach (string searchPath in s_searchPaths.Value)
             {
                 string assemblyPath = Path.Combine(searchPath, assemblyFileName);
                 if (File.Exists(assemblyPath))
diff --git a/src/csharp/Microsoft.Spark/Utils/CollectionUtils.cs b/src/csharp/Microsoft.Spark/Utils/CollectionUtils.cs
new file mode 100644
index 000000000..774e20835
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Utils/CollectionUtils.cs
@@ -0,0 +1,18 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Linq;
+
+namespace Microsoft.Spark.Utils
+{
+    internal static class CollectionUtils
+    {
+        internal static bool ArrayEquals<T>(T[] array1, T[] array2)
+        {
+            return (array1?.Length == array2?.Length) &&
+                ((array1 == null) || array1.SequenceEqual(array2));
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs b/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs
new file mode 100644
index 000000000..f379cfe24
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs
@@ -0,0 +1,99 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.IO;
+using System.Runtime.Serialization.Formatters.Binary;
+
+namespace Microsoft.Spark.Utils
+{
+    internal class DependencyProviderUtils
+    {
+        private static readonly string s_filePattern = "dependencyProviderMetadata_*";
+
+        internal static string[] GetMetadataFiles(string path) =>
+            Directory.GetFiles(path, s_filePattern);
+
+        // Create the dependency provider metadata filename based on the number passed into the
+        // function.
+        // 
+        // number => filename
+        // 0      => dependencyProviderMetadata_00000000000000000000
+        // 1      => dependencyProviderMetadata_00000000000000000001
+        // ...
+        // 20     => dependencyProviderMetadata_00000000000000000020
+        internal static string CreateFileName(ulong number) =>
+            s_filePattern.Replace("*", $"{number:D20}");
+
+        [Serializable]
+        internal class NuGetMetadata
+        {
+            public string FileName { get; set; }
+            public string PackageName { get; set; }
+            public string PackageVersion { get; set; }
+
+            public override int GetHashCode()
+            {
+                return base.GetHashCode();
+            }
+
+            public override bool Equals(object obj)
+            {
+                return (obj is NuGetMetadata nugetMetadata) &&
+                    Equals(nugetMetadata);
+            }
+
+            private bool Equals(NuGetMetadata other)
+            {
+                return (other != null) &&
+                    (FileName == other.FileName) &&
+                    (PackageName == other.PackageName) &&
+                    (PackageVersion == other.PackageVersion);
+            }
+        }
+
+        [Serializable]
+        internal class Metadata
+        {
+            public string[] AssemblyProbingPaths { get; set; }
+            public string[] NativeProbingPaths { get; set; }
+            public NuGetMetadata[] NuGets { get; set; }
+
+            public override int GetHashCode()
+            {
+                return base.GetHashCode();
+            }
+
+            public override bool Equals(object obj)
+            {
+                return (obj is Metadata metadata) &&
+                    Equals(metadata);
+            }
+
+            internal static Metadata Deserialize(string path)
+            {
+                using FileStream fileStream = File.OpenRead(path);
+                var formatter = new BinaryFormatter();
+                return (Metadata)formatter.Deserialize(fileStream);
+            }
+
+            internal void Serialize(string path)
+            {
+                using FileStream fileStream = File.OpenWrite(path);
+                var formatter = new BinaryFormatter();
+                formatter.Serialize(fileStream, this);
+            }
+
+            private bool Equals(Metadata other)
+            {
+                return (other != null) &&
+                    CollectionUtils.ArrayEquals(
+                        AssemblyProbingPaths,
+                        other.AssemblyProbingPaths) &&
+                    CollectionUtils.ArrayEquals(NativeProbingPaths, other.NativeProbingPaths) &&
+                    CollectionUtils.ArrayEquals(NuGets, other.NuGets);
+            }
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs b/src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs
index 638838b9f..d338ddbdb 100644
--- a/src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs
+++ b/src/csharp/Microsoft.Spark/Utils/UdfSerDe.cs
@@ -257,8 +257,21 @@ private static TypeData SerializeType(Type type)
         private static Type DeserializeType(TypeData typeData) =>
             s_typeCache.GetOrAdd(
                 typeData,
-                td => AssemblyLoader.LoadAssembly(
-                    td.AssemblyName,
-                    td.AssemblyFileName).GetType(td.Name));
+                td =>
+                {
+                    Type type = AssemblyLoader.LoadAssembly(
+                        td.AssemblyName,
+                        td.AssemblyFileName).GetType(td.Name);
+                    if (type == null)
+                    {
+                        throw new FileNotFoundException(
+                            string.Format(
+                                "Assembly '{0}' file not found '{1}'",
+                                td.AssemblyName,
+                                td.AssemblyFileName));
+                    }
+
+                    return type;
+                });
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs b/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
index b012794ba..a4c09ae3b 100644
--- a/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
+++ b/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
@@ -5,7 +5,6 @@
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
-using System.Linq;
 using Apache.Arrow;
 using Microsoft.Data.Analysis;
 using Microsoft.Spark.Interop;
@@ -183,22 +182,24 @@ internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] c
         private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
         {
             var environmentVars = new Hashtable(jvm);
-            string assemblySearchPath = string.Join(",",
-                new[]
-                {
-                    Environment.GetEnvironmentVariable(
-                        AssemblySearchPathResolver.AssemblySearchPathsEnvVarName),
-                    SparkFiles.GetRootDirectory()
-                }.Where(s => !string.IsNullOrWhiteSpace(s)));
-
+            string assemblySearchPath = Environment.GetEnvironmentVariable(
+                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName);
             if (!string.IsNullOrEmpty(assemblySearchPath))
             {
                 environmentVars.Put(
                     AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                     assemblySearchPath);
             }
-            // DOTNET_WORKER_SPARK_VERSION is used to handle different versions of Spark on the worker.
-            environmentVars.Put("DOTNET_WORKER_SPARK_VERSION", SparkEnvironment.SparkVersion.ToString());
+            // DOTNET_WORKER_SPARK_VERSION is used to handle different versions
+            // of Spark on the worker.
+            environmentVars.Put(
+                "DOTNET_WORKER_SPARK_VERSION",
+                SparkEnvironment.SparkVersion.ToString());
+
+            if (EnvironmentUtils.GetEnvironmentVariableAsBool("DOTNET_SPARK_RUNNING_REPL"))
+            {
+                environmentVars.Put("DOTNET_SPARK_RUNNING_REPL", "true");
+            }
 
             return environmentVars;
         }

From 4be3dce24a4020d6dbbc738fed05d60be4e4407b Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Fri, 12 Jun 2020 21:14:26 -0700
Subject: [PATCH 04/27] dotnet-interactive assembly extension (#517)

---
 ...ensions.DotNet.Interactive.UnitTest.csproj |  25 +++
 .../PackageResolverTests.cs                   |  95 ++++++++++
 .../AssemblyKernelExtension.cs                | 156 +++++++++++++++++
 ...Spark.Extensions.DotNet.Interactive.csproj |  40 +++++
 .../PackageResolver.cs                        | 165 ++++++++++++++++++
 .../PackageRestoreContextWrapper.cs           |  14 ++
 .../ResolvedNugetPackage.cs                   |  15 ++
 .../DependencyProviderUtilsTests.cs           |   8 +-
 .../Utils/AssemblyLoaderHelper.cs             |   4 +-
 src/csharp/Microsoft.Spark.sln                |  14 ++
 src/csharp/Microsoft.Spark/Constants.cs       |  11 ++
 .../Microsoft.Spark/Microsoft.Spark.csproj    |   2 +
 .../Microsoft.Spark/Utils/AssemblyLoader.cs   |   2 +-
 .../Utils/DependencyProviderUtils.cs          |  10 +-
 src/csharp/Microsoft.Spark/Utils/UdfUtils.cs  |   4 +-
 15 files changed, 551 insertions(+), 14 deletions(-)
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs
 create mode 100644 src/csharp/Microsoft.Spark/Constants.cs

diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
new file mode 100644
index 000000000..b0af57cf2
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
@@ -0,0 +1,25 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+    <RootNamespace>Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest</RootNamespace>
+    <IsPackable>false</IsPackable>
+    
+    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Moq" Version="4.10.0" />
+    <PackageReference Include="Microsoft.DotNet.Interactive" Version="1.0.0-beta.20262.1" />
+  </ItemGroup>
+  
+  <ItemGroup>
+    <ProjectReference Include="..\Microsoft.Spark.Extensions.DotNet.Interactive\Microsoft.Spark.Extensions.DotNet.Interactive.csproj" />
+    <ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
+  </ItemGroup>
+  
+  <ItemGroup>
+    <Compile Include="..\..\Microsoft.Spark.UnitTest\TestUtils\TemporaryDirectory.cs" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
new file mode 100644
index 000000000..8fac95d7a
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
@@ -0,0 +1,95 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Microsoft.Spark.Utils;
+using Moq;
+using Xunit;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest
+{
+    public class PackageResolverTests
+    {
+        [Fact]
+        public void TestPackageResolver()
+        {
+            using var tempDir = new TemporaryDirectory();
+
+            string packageName = "package.name";
+            string packageVersion = "0.1.0";
+            string packageRootPath =
+                Path.Combine(tempDir.Path, "path", "to", "packages", packageName, packageVersion);
+            string packageFrameworkPath = Path.Combine(packageRootPath, "lib", "framework");
+
+            Directory.CreateDirectory(packageRootPath);
+            var nugetFile = new FileInfo(
+                Path.Combine(packageRootPath, $"{packageName}.{packageVersion}.nupkg"));
+            using (File.Create(nugetFile.FullName))
+            {
+            }
+            
+            var assemblyPaths = new List<FileInfo>
+            {
+                new FileInfo(Path.Combine(packageFrameworkPath, "1.dll")),
+                new FileInfo(Path.Combine(packageFrameworkPath, "2.dll"))
+            };
+            var probingPaths = new List<DirectoryInfo> { new DirectoryInfo(packageRootPath) };
+
+            var mockPackageRestoreContextWrapper = new Mock<PackageRestoreContextWrapper>();
+            mockPackageRestoreContextWrapper
+                .SetupGet(m => m.ResolvedPackageReferences)
+                .Returns(new ResolvedPackageReference[]
+                {
+                    new ResolvedPackageReference(
+                        packageName,
+                        packageVersion,
+                        assemblyPaths,
+                        new DirectoryInfo(packageRootPath),
+                        probingPaths) 
+                });
+
+            var packageResolver = new PackageResolver(mockPackageRestoreContextWrapper.Object);
+            IEnumerable<string> actualFiles = packageResolver.GetFiles(tempDir.Path);
+
+            string metadataFilePath =
+                Path.Combine(tempDir.Path, DependencyProviderUtils.CreateFileName(1));
+            var expectedFiles = new string[]
+            {
+                nugetFile.FullName,
+                metadataFilePath
+            };
+            Assert.True(expectedFiles.SequenceEqual(actualFiles));
+            Assert.True(File.Exists(metadataFilePath));
+
+            DependencyProviderUtils.Metadata actualMetadata =
+                DependencyProviderUtils.Metadata.Deserialize(metadataFilePath);
+            var expectedMetadata = new DependencyProviderUtils.Metadata
+            {
+                AssemblyProbingPaths = new string[]
+                {
+                    Path.Combine(packageName, packageVersion, "lib", "framework", "1.dll"),
+                    Path.Combine(packageName, packageVersion, "lib", "framework", "2.dll")
+                },
+                NativeProbingPaths = new string[]
+                {
+                    Path.Combine(packageName, packageVersion)
+                },
+                NuGets = new DependencyProviderUtils.NuGetMetadata[]
+                {
+                    new DependencyProviderUtils.NuGetMetadata
+                    {
+                        FileName = $"{packageName}.{packageVersion}.nupkg",
+                        PackageName = packageName,
+                        PackageVersion = packageVersion
+                    }
+                }
+            };
+            Assert.True(expectedMetadata.Equals(actualMetadata));
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
new file mode 100644
index 000000000..80977c46e
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
@@ -0,0 +1,156 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Microsoft.CodeAnalysis;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Commands;
+using Microsoft.DotNet.Interactive.CSharp;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Utils;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+    /// <summary>
+    /// A kernel extension when using .NET for Apache Spark with Microsoft.DotNet.Interactive
+    /// Adds nuget and assembly dependencies to the default <see cref="SparkSession"/>
+    /// using <see cref="SparkContext.AddFile(string, bool)"/>.
+    /// </summary>
+    public class AssemblyKernelExtension : IKernelExtension
+    {
+        private const string TempDirEnvVar = "DOTNET_SPARK_EXTENSION_INTERACTIVE_TMPDIR";
+
+        private readonly PackageResolver _packageResolver =
+            new PackageResolver(new PackageRestoreContextWrapper());
+
+        /// <summary>
+        /// Called by the Microsoft.DotNet.Interactive Assembly Extension Loader.
+        /// </summary>
+        /// <param name="kernel">The kernel calling this method.</param>
+        /// <returns><see cref="Task.CompletedTask"/> when extension is loaded.</returns>
+        public Task OnLoadAsync(IKernel kernel)
+        {
+            if (kernel is CompositeKernel kernelBase)
+            {
+                Environment.SetEnvironmentVariable(Constants.RunningREPLEnvVar, "true");
+
+                DirectoryInfo tempDir = CreateTempDirectory();
+                kernelBase.RegisterForDisposal(new DisposableDirectory(tempDir));
+
+                kernelBase.AddMiddleware(async (command, context, next) =>
+                {
+                    if ((context.HandlingKernel is CSharpKernel kernel) &&
+                        (command is SubmitCode) &&
+                        TryGetSparkSession(out SparkSession sparkSession) &&
+                        TryEmitAssembly(kernel, tempDir.FullName, out string assemblyPath))
+                    {
+                        sparkSession.SparkContext.AddFile(assemblyPath);
+
+                        foreach (string filePath in GetPackageFiles(tempDir.FullName))
+                        {
+                            sparkSession.SparkContext.AddFile(filePath);
+                        }
+                    }
+
+                    await next(command, context);
+                });
+            }
+
+            return Task.CompletedTask;
+        }
+
+        private DirectoryInfo CreateTempDirectory()
+        {
+            string envTempDir = Environment.GetEnvironmentVariable(TempDirEnvVar);
+            string tempDirBasePath = string.IsNullOrEmpty(envTempDir) ?
+                Directory.GetCurrentDirectory() :
+                envTempDir;
+
+            if (!IsPathValid(tempDirBasePath))
+            {
+                throw new Exception($"[{GetType().Name}] Spaces in " +
+                    $"'{tempDirBasePath}' is unsupported. Set the {TempDirEnvVar} " +
+                    "environment variable to control the base path. Please see " +
+                    "https://issues.apache.org/jira/browse/SPARK-30126 and " +
+                    "https://github.com/apache/spark/pull/26773 for more details.");
+            }
+
+            return Directory.CreateDirectory(
+                Path.Combine(tempDirBasePath, Path.GetRandomFileName()));
+        }
+
+        private bool TryEmitAssembly(CSharpKernel kernel, string dstPath, out string assemblyPath)
+        {
+            Compilation compilation = kernel.ScriptState.Script.GetCompilation();
+            string assemblyName =
+                AssemblyLoader.NormalizeAssemblyName(compilation.AssemblyName);
+            assemblyPath = Path.Combine(dstPath, $"{assemblyName}.dll");
+            if (!File.Exists(assemblyPath))
+            {
+                FileSystemExtensions.Emit(compilation, assemblyPath);
+                return true;
+            }
+
+            throw new Exception(
+                $"TryEmitAssembly() unexpected duplicate assembly: ${assemblyPath}");
+        }
+
+        private bool TryGetSparkSession(out SparkSession sparkSession)
+        {
+            sparkSession = SparkSession.GetDefaultSession();
+            return sparkSession != null;
+        }
+
+        private IEnumerable<string> GetPackageFiles(string path)
+        {
+            foreach (string filePath in _packageResolver.GetFiles(path))
+            {
+                if (IsPathValid(filePath))
+                {
+                    yield return filePath;
+                }
+                else
+                {
+                    // Copy file to a path without spaces.
+                    string fileDestPath = Path.Combine(
+                        path,
+                        Path.GetFileName(filePath).Replace(" ", string.Empty));
+                    File.Copy(filePath, fileDestPath);
+                    yield return fileDestPath;
+                }
+            }
+        }
+
+        /// <summary>
+        /// In some versions of Spark, spaces is unsupported when using
+        /// <see cref="SparkContext.AddFile(string, bool)"/>.
+        /// 
+        /// For more details please see:
+        /// - https://issues.apache.org/jira/browse/SPARK-30126
+        /// - https://github.com/apache/spark/pull/26773
+        /// </summary>
+        /// <param name="path">The path to validate.</param>
+        /// <returns>true if the path is supported by Spark, false otherwise.</returns>
+        private bool IsPathValid(string path)
+        {
+            if (!path.Contains(" "))
+            {
+                return true;
+            }
+
+            Version version = SparkEnvironment.SparkVersion;
+            return (version.Major, version.Minor, version.Build) switch
+            {
+                (2, _, _) => false,
+                (3, 0, _) => true,
+                _ => throw new NotSupportedException($"Spark {version} not supported.")
+            };
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
new file mode 100644
index 000000000..6966e3390
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
@@ -0,0 +1,40 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Library</OutputType>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+    <RootNamespace>Microsoft.Spark.Extensions.DotNet.Interactive</RootNamespace>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+    <IsPackable>true</IsPackable>
+    <!-- NU5100 warns that a dll was found outside the 'lib' folder while packaging. DotNet.Interactive expects extension dlls in the 'interactive-extensions/dotnet'. -->
+    <NoWarn>NU5100;$(NoWarn)</NoWarn>
+
+    <Description>DotNet Interactive Extension for .NET for Apache Spark</Description>
+    <PackageReleaseNotes>https://github.com/dotnet/spark/tree/master/docs/release-notes</PackageReleaseNotes>
+    <PackageTags>spark;dotnet;csharp;interactive;dotnet-interactive</PackageTags>
+
+    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
+  </PropertyGroup>
+  
+  <ItemGroup>
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest" />
+    <InternalsVisibleTo Include="DynamicProxyGenAssembly2" Key="0024000004800000940000000602000000240000525341310004000001000100c547cac37abd99c8db225ef2f6c8a3602f3b3606cc9891605d02baa56104f4cfc0734aa39b93bf7852f7d9266654753cc297e7d2edfe0bac1cdcf9f717241550e0a7b191195b7667bb4f64bcb8e2121380fd1d9d46ad2d92d2d15605093924cceaf74c4861eff62abf69b9291ed0a340e113be11e6a7d3113e92484cf7045cc7" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.DotNet.Interactive.CSharp" Version="1.0.0-beta.20262.1">
+      <PrivateAssets>all</PrivateAssets>
+    </PackageReference>
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <None Include="$(OutputPath)/Microsoft.Spark.Extensions.DotNet.Interactive.dll"
+      Pack="true"
+      PackagePath="interactive-extensions/dotnet" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
new file mode 100644
index 000000000..4e91156ba
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
@@ -0,0 +1,165 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.Utils;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+    internal class PackageResolver
+    {
+        private readonly PackageRestoreContextWrapper _packageRestoreContextWrapper;
+        private readonly ConcurrentDictionary<string, byte> _filesCopied;
+        private long _metadataCounter;
+
+        internal PackageResolver(PackageRestoreContextWrapper packageRestoreContextWrapper)
+        {
+            _packageRestoreContextWrapper = packageRestoreContextWrapper;
+            _filesCopied = new ConcurrentDictionary<string, byte>();
+            _metadataCounter = 0;
+        }
+
+        /// <summary>
+        /// Generates and serializes a <see cref="DependencyProviderUtils.Metadata"/> to
+        /// <paramref name="writePath"/>. Returns a list of file paths which include the
+        /// the serialized <see cref="DependencyProviderUtils.Metadata"/> and nuget file
+        /// dependencies.
+        /// </summary>
+        /// <param name="writePath">Path to write metadata.</param>
+        /// <returns>
+        /// List of file paths of the serialized <see cref="DependencyProviderUtils.Metadata"/>
+        /// and nuget file dependencies.
+        /// </returns>
+        internal IEnumerable<string> GetFiles(string writePath)
+        {
+            IEnumerable<ResolvedNuGetPackage> nugetPackagesToCopy = GetNewPackages();
+
+            var assemblyProbingPaths = new List<string>();
+            var nativeProbingPaths = new List<string>();
+            var nugetMetadata = new List<DependencyProviderUtils.NuGetMetadata>();
+
+            foreach (ResolvedNuGetPackage package in nugetPackagesToCopy)
+            {
+                ResolvedPackageReference resolvedPackage = package.ResolvedPackage;
+
+                foreach (FileInfo asmPath in resolvedPackage.AssemblyPaths)
+                {
+                    // asmPath.FullName
+                    //   /path/to/packages/package.name/package.version/lib/framework/1.dll
+                    // resolvedPackage.PackageRoot
+                    //   /path/to/packages/package.name/package.version/
+                    // GetRelativeToPackages(..)
+                    //   package.name/package.version/lib/framework/1.dll
+                    assemblyProbingPaths.Add(
+                        GetPathRelativeToPackages(
+                            asmPath.FullName,
+                            resolvedPackage.PackageRoot));
+                }
+
+                foreach (DirectoryInfo probePath in resolvedPackage.ProbingPaths)
+                {
+                    // probePath.FullName
+                    //   /path/to/packages/package.name/package.version/
+                    // resolvedPackage.PackageRoot
+                    //   /path/to/packages/package.name/package.version/
+                    // GetRelativeToPackages(..)
+                    //   package.name/package.version
+                    nativeProbingPaths.Add(
+                        GetPathRelativeToPackages(
+                            probePath.FullName,
+                            resolvedPackage.PackageRoot));
+                }
+
+                nugetMetadata.Add(
+                    new DependencyProviderUtils.NuGetMetadata
+                    {
+                        FileName = package.NuGetFile.Name,
+                        PackageName = resolvedPackage.PackageName,
+                        PackageVersion = resolvedPackage.PackageVersion
+                    });
+
+                yield return package.NuGetFile.FullName;
+            }
+
+            if (nugetMetadata.Count > 0)
+            {
+                var metadataPath =
+                    Path.Combine(
+                        writePath,
+                        DependencyProviderUtils.CreateFileName(
+                            Interlocked.Increment(ref _metadataCounter)));
+                new DependencyProviderUtils.Metadata
+                {
+                    AssemblyProbingPaths = assemblyProbingPaths.ToArray(),
+                    NativeProbingPaths = nativeProbingPaths.ToArray(),
+                    NuGets = nugetMetadata.ToArray()
+                }.Serialize(metadataPath);
+
+                yield return metadataPath;
+            }
+        }
+
+        /// <summary>
+        /// Return the delta of the list of packages that have been introduced
+        /// since the last call.
+        /// </summary>
+        /// <returns>The delta of the list of packages.</returns>
+        private IEnumerable<ResolvedNuGetPackage> GetNewPackages()
+        {
+            IEnumerable<ResolvedPackageReference> packages =
+                _packageRestoreContextWrapper.ResolvedPackageReferences;
+            foreach (ResolvedPackageReference package in packages)
+            {
+                IEnumerable<FileInfo> files =
+                    package.PackageRoot.EnumerateFiles("*.nupkg", SearchOption.AllDirectories);
+
+                foreach (FileInfo file in files)
+                {
+                    if (_filesCopied.TryAdd(file.Name, 1))
+                    {
+                        yield return new ResolvedNuGetPackage
+                        {
+                            ResolvedPackage = package,
+                            NuGetFile = file
+                        };
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Given a <paramref name="path"/>, get the relative path to the packages directory.
+        /// The package <paramref name="directory"/> is a subfolder within the packages directory.
+        /// 
+        /// Examples:
+        /// path:
+        ///  /path/to/packages/package.name/package.version/lib/framework/1.dll
+        /// directory:
+        ///  /path/to/packages/package.name/package.version/
+        /// relative path:
+        ///   package.name/package.version/lib/framework/1.dll
+        /// 
+        /// path:
+        ///   /path/to/packages/package.name/package.version/
+        /// directory:
+        ///   /path/to/packages/package.name/package.version/
+        /// relative path:
+        ///   package.name/package.version
+        /// </summary>
+        /// <param name="path">The full path used to determine the relative path.</param>
+        /// <param name="directory">The package directory.</param>
+        /// <returns>The relative path to the packages directory.</returns>
+        private string GetPathRelativeToPackages(string path, DirectoryInfo directory)
+        {
+            string strippedRoot = path
+                .Substring(directory.FullName.Length)
+                .Trim(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
+            return Path.Combine(directory.Parent.Name, directory.Name, strippedRoot);
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs
new file mode 100644
index 000000000..259088d7a
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs
@@ -0,0 +1,14 @@
+﻿using System.Collections.Generic;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Utility;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+    internal class PackageRestoreContextWrapper
+    {
+        internal virtual IEnumerable<ResolvedPackageReference> ResolvedPackageReferences =>
+            ((ISupportNuget)KernelInvocationContext.Current.HandlingKernel)
+            .PackageRestoreContext
+            .ResolvedPackageReferences;
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs
new file mode 100644
index 000000000..57106c16a
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs
@@ -0,0 +1,15 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.IO;
+using Microsoft.DotNet.Interactive.Utility;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+    internal class ResolvedNuGetPackage
+    {
+        public ResolvedPackageReference ResolvedPackage { get; set; }
+        public FileInfo NuGetFile { get; set; }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs b/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
index ccfc4890b..ad01e3724 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/DependencyProviderUtilsTests.cs
@@ -131,7 +131,7 @@ public void TestMetadataSerDe()
         public void TestFileNames()
         {
             using var tempDir = new TemporaryDirectory();
-            foreach (ulong num in Enumerable.Range(0, 3).Select(x => System.Math.Pow(10, x)))
+            foreach (long num in Enumerable.Range(0, 3).Select(x => System.Math.Pow(10, x)))
             {
                 string filePath =
                     Path.Combine(tempDir.Path, DependencyProviderUtils.CreateFileName(num));
@@ -140,9 +140,9 @@ public void TestFileNames()
 
             var expectedFiles = new string[] 
             {
-                "dependencyProviderMetadata_00000000000000000001",
-                "dependencyProviderMetadata_00000000000000000010",
-                "dependencyProviderMetadata_00000000000000000100",
+                "dependencyProviderMetadata_0000000000000000001",
+                "dependencyProviderMetadata_0000000000000000010",
+                "dependencyProviderMetadata_0000000000000000100",
             };
             IOrderedEnumerable<string> actualFiles = DependencyProviderUtils
                 .GetMetadataFiles(tempDir.Path)
diff --git a/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs b/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
index b74228073..1443165bc 100644
--- a/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
+++ b/src/csharp/Microsoft.Spark.Worker/Utils/AssemblyLoaderHelper.cs
@@ -24,7 +24,7 @@ private static readonly ConcurrentDictionary<string, Lazy<DependencyProvider>>
             s_dependencyProviders = new ConcurrentDictionary<string, Lazy<DependencyProvider>>();
 
         private static readonly bool s_runningREPL =
-            EnvironmentUtils.GetEnvironmentVariableAsBool("DOTNET_SPARK_RUNNING_REPL");
+            EnvironmentUtils.GetEnvironmentVariableAsBool(Constants.RunningREPLEnvVar);
 
         /// <summary>
         /// Register the AssemblyLoader.ResolveAssembly handler to handle the
@@ -49,7 +49,7 @@ static AssemblyLoaderHelper()
         /// These files include:
         /// - "{packagename}.{version}.nupkg"
         ///   The nuget packages
-        /// - <see cref="DependencyProviderUtils.CreateFileName(ulong)"/>
+        /// - <see cref="DependencyProviderUtils.CreateFileName(long)"/>
         ///   Serialized <see cref="DependencyProviderUtils.Metadata"/> object.
         ///
         /// On the Worker, in order to resolve the nuget dependencies referenced by
diff --git a/src/csharp/Microsoft.Spark.sln b/src/csharp/Microsoft.Spark.sln
index 49eac3fc7..73047bff3 100644
--- a/src/csharp/Microsoft.Spark.sln
+++ b/src/csharp/Microsoft.Spark.sln
@@ -35,6 +35,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Spark.Extensions.
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Spark.Extensions.Azure.Synapse.Analytics", "Extensions\Microsoft.Spark.Extensions.Azure.Synapse.Analytics\Microsoft.Spark.Extensions.Azure.Synapse.Analytics.csproj", "{47652C7D-B076-4FD9-98AC-959E38BE18E3}"
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Spark.Extensions.DotNet.Interactive", "Extensions\Microsoft.Spark.Extensions.DotNet.Interactive\Microsoft.Spark.Extensions.DotNet.Interactive.csproj", "{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest", "Extensions\Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest\Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj", "{7BDE09ED-04B3-41B2-A466-3D6F7225291E}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -89,6 +93,14 @@ Global
 		{47652C7D-B076-4FD9-98AC-959E38BE18E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{47652C7D-B076-4FD9-98AC-959E38BE18E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{47652C7D-B076-4FD9-98AC-959E38BE18E3}.Release|Any CPU.Build.0 = Release|Any CPU
+		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C}.Release|Any CPU.Build.0 = Release|Any CPU
+		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -99,6 +111,8 @@ Global
 		{2048446B-45AB-4304-B230-50EDF6E8E6A4} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 		{206E16CA-ED59-4F5E-8EA1-9BB7BEEACB63} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 		{47652C7D-B076-4FD9-98AC-959E38BE18E3} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
+		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
+		{7BDE09ED-04B3-41B2-A466-3D6F7225291E} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {FD15FFDB-EA1B-436F-841D-3386DDF94538}
diff --git a/src/csharp/Microsoft.Spark/Constants.cs b/src/csharp/Microsoft.Spark/Constants.cs
new file mode 100644
index 000000000..c346aadd3
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Constants.cs
@@ -0,0 +1,11 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.Spark
+{
+    internal class Constants
+    {
+        internal const string RunningREPLEnvVar = "DOTNET_SPARK_RUNNING_REPL";
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
index f3d3f1ffd..050a43493 100644
--- a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
+++ b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
@@ -17,6 +17,8 @@
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Azure.Synapse.Analytics" />
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Delta" />
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Delta.E2ETest" />
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.DotNet.Interactive" />
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest" />
     <InternalsVisibleTo Include="Microsoft.Spark.UnitTest" />
     <InternalsVisibleTo Include="Microsoft.Spark.Worker" />
     <InternalsVisibleTo Include="Microsoft.Spark.Worker.UnitTest" />
diff --git a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
index 94a37dbb5..3b9b34f5e 100644
--- a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
+++ b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
@@ -217,7 +217,7 @@ ex is FileLoadException ||
         /// </summary>
         /// <param name="assemblyName">Assembly name</param>
         /// <returns>Normalized assembly name</returns>
-        private static string NormalizeAssemblyName(string assemblyName)
+        internal static string NormalizeAssemblyName(string assemblyName)
         {
             // Check if the assembly name follows the Roslyn naming convention.
             // Roslyn assembly name: "\u211B*4b31b71b-d4bd-4642-9f63-eef5f5d99197#1-14"
diff --git a/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs b/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs
index f379cfe24..3954151d1 100644
--- a/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs
+++ b/src/csharp/Microsoft.Spark/Utils/DependencyProviderUtils.cs
@@ -19,12 +19,12 @@ internal static string[] GetMetadataFiles(string path) =>
         // function.
         // 
         // number => filename
-        // 0      => dependencyProviderMetadata_00000000000000000000
-        // 1      => dependencyProviderMetadata_00000000000000000001
+        // 0      => dependencyProviderMetadata_0000000000000000000
+        // 1      => dependencyProviderMetadata_0000000000000000001
         // ...
-        // 20     => dependencyProviderMetadata_00000000000000000020
-        internal static string CreateFileName(ulong number) =>
-            s_filePattern.Replace("*", $"{number:D20}");
+        // 20     => dependencyProviderMetadata_0000000000000000020
+        internal static string CreateFileName(long number) =>
+            s_filePattern.Replace("*", $"{number:D19}");
 
         [Serializable]
         internal class NuGetMetadata
diff --git a/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs b/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
index a4c09ae3b..ccb5e5209 100644
--- a/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
+++ b/src/csharp/Microsoft.Spark/Utils/UdfUtils.cs
@@ -196,9 +196,9 @@ private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBr
                 "DOTNET_WORKER_SPARK_VERSION",
                 SparkEnvironment.SparkVersion.ToString());
 
-            if (EnvironmentUtils.GetEnvironmentVariableAsBool("DOTNET_SPARK_RUNNING_REPL"))
+            if (EnvironmentUtils.GetEnvironmentVariableAsBool(Constants.RunningREPLEnvVar))
             {
-                environmentVars.Put("DOTNET_SPARK_RUNNING_REPL", "true");
+                environmentVars.Put(Constants.RunningREPLEnvVar, "true");
             }
 
             return environmentVars;

From e43a0a465ada4eec837322d54d03a15c88d9e74f Mon Sep 17 00:00:00 2001
From: elvaliuliuliu <47404285+elvaliuliuliu@users.noreply.github.com>
Date: Sat, 13 Jun 2020 15:12:59 -0700
Subject: [PATCH 05/27] Add multiple stages for Azure Pipeline (#524)

---
 azure-pipelines.yml | 876 +++++++++++++++++++++++---------------------
 1 file changed, 466 insertions(+), 410 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 20215a7b2..14b6e689a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -11,9 +11,9 @@ variables:
   _SignType: real
   _TeamName: DotNetSpark
   MSBUILDSINGLELOADCONTEXT: 1
-  # backwardCompatibleRelease/forwardCompatibleRelease is the "oldest" releases that work with the current release
-  backwardCompatibleRelease: '0.9.0'
+  # forwardCompatibleRelease/backwardCompatibleRelease is the "oldest" releases that work with the current release
   forwardCompatibleRelease: '0.9.0'
+  backwardCompatibleRelease: '0.9.0'
   TestsToFilterOut: "(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameGroupedMapUdf)&\
   (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameVectorUdf)&\
   (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestDestroy)&\
@@ -22,7 +22,8 @@ variables:
   (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsTimestampType)&\
   (FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithTimestampType)&\
   (FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.SparkSessionTests.TestCreateDataFrameWithTimestamp)"
-  LatestDotnetWorkerDir: '$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker\netcoreapp3.1\win-x64'
+  ArtifactPath: '$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries'
+  CurrentDotnetWorkerDir: '$(ArtifactPath)\Microsoft.Spark.Worker\netcoreapp3.1\win-x64'
   BackwardCompatibleDotnetWorkerDir: $(Build.BinariesDirectory)\Microsoft.Spark.Worker-$(backwardCompatibleRelease)
 
   # Azure DevOps variables are transformed into environment variables, with these variables we
@@ -38,404 +39,131 @@ resources:
     name: dotnet/spark
     ref: refs/tags/v$(forwardCompatibleRelease)
 
-jobs:
-- job: Build
-  displayName: Build and Test Sources
-  pool: Hosted VS2017
-
-  variables:
-    ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
-      _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
-    HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
-
-  steps:
-  - checkout: self
-    path: s\master
-  - checkout: forwardCompatibleRelease
-    path: s\$(forwardCompatibleRelease)
-
-  - task: Maven@3
-    displayName: 'Maven build src'
-    inputs:
-      mavenPomFile: master/src/scala/pom.xml
-
-  - task: Maven@3
-    displayName: 'Maven build benchmark'
-    inputs:
-      mavenPomFile: master/benchmark/scala/pom.xml
-
-  - task: BatchScript@1
-    displayName: Download Spark Distros & Winutils.exe
-    inputs:
-      filename: master\script\download-spark-distros.cmd
-      arguments: $(Build.BinariesDirectory)
-
-  - task: BatchScript@1
-    displayName: Download backward compatible worker v$(backwardCompatibleRelease)
-    inputs:
-      filename: master\script\download-worker-release.cmd
-      arguments: '$(Build.BinariesDirectory) $(backwardCompatibleRelease)'
-
-  - script: master\build.cmd -pack
-              -c $(buildConfiguration)
-              -ci
-              $(_OfficialBuildIdArgs)
-              /p:PublishSparkWorker=true
-              /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker
-    displayName: '.NET build'
-
-  - task: DotNetCoreCLI@2
-    displayName: '.NET unit tests'
-    inputs:
-      command: test
-      projects: 'master/**/*UnitTest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.0'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.1'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.2'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.3'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.4'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.0'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.1'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-  
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.3'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.4'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.5'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.0 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.1 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.2 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.3 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.4 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.0 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.1 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-  
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.3 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.4 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.5 with backward compatible worker v$(backwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
-  - task: Maven@3
-    displayName: 'Maven build src for forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      mavenPomFile: $(forwardCompatibleRelease)/src/scala/pom.xml
-      
-  - script: $(forwardCompatibleRelease)\build.cmd
-              -c $(buildConfiguration)
-              -ci
-              $(_OfficialBuildIdArgs)
-              /p:PublishSparkWorker=false
-    displayName: '.NET build for forward compatible release v$(forwardCompatibleRelease)'
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.0 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.1 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.2 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.3 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.3.4 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.0 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.1 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-  
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.3 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.4 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
-  - task: DotNetCoreCLI@2
-    displayName: 'E2E tests for Spark 2.4.5 from forward compatible release v$(forwardCompatibleRelease)'
-    inputs:
-      command: test
-      projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
-      arguments: '--configuration $(buildConfiguration)'
-    env:
-      SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
-      DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
+stages:
+- stage: Build
+  displayName: Build Sources
+  jobs:
+  - job: Build
+    pool: Hosted VS2017
 
-  - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
-    - task: CopyFiles@2
-      displayName: Stage .NET artifacts
+    variables:
+      ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+        _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+
+    steps:
+    - task: Maven@3
+      displayName: 'Maven build src'
+      inputs:
+        mavenPomFile: src/scala/pom.xml
+
+    - task: Maven@3
+      displayName: 'Maven build benchmark'
+      inputs:
+        mavenPomFile: benchmark/scala/pom.xml
+
+    - script: build.cmd -pack
+                -c $(buildConfiguration)
+                -ci
+                $(_OfficialBuildIdArgs)
+                /p:PublishSparkWorker=true
+                /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker
+      displayName: '.NET build'
+
+    - task: DotNetCoreCLI@2
+      displayName: '.NET unit tests'
       inputs:
-        sourceFolder: $(Build.SourcesDirectory)/master/artifacts/packages/$(buildConfiguration)/Shipping
-        contents: |
-          **/*.nupkg
-          **/*.snupkg
-        targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/packages/$(buildConfiguration)/Shipping
+        command: test
+        projects: '**/*UnitTest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
 
     - task: CopyFiles@2
-      displayName: Stage build logs
+      displayName: Stage Maven build jars
       inputs:
-        sourceFolder: $(Build.SourcesDirectory)/master/artifacts/log
-        targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/log
+        sourceFolder: $(Build.SourcesDirectory)/src/scala
+        contents: '**/*.jar'
+        targetFolder: $(Build.ArtifactStagingDirectory)/Jars
+
+    - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+      - task: CopyFiles@2
+        displayName: Stage .NET artifacts
+        inputs:
+          sourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(buildConfiguration)/Shipping
+          contents: |
+            **/*.nupkg
+            **/*.snupkg
+          targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/packages/$(buildConfiguration)/Shipping
+
+      - task: CopyFiles@2
+        displayName: Stage build logs
+        inputs:
+          sourceFolder: $(Build.SourcesDirectory)/artifacts/log
+          targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/log
 
     - task: PublishBuildArtifacts@1
       inputs:
         pathtoPublish: '$(Build.ArtifactStagingDirectory)'
         artifactName:  Microsoft.Spark.Binaries
 
-- ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
-  - job: SignPublish
-    dependsOn:
-      - Build
-    displayName: Sign and Publish Artifacts
-    pool:
-      name: NetCoreInternal-Pool
-      queue: buildpool.windows.10.amd64.vs2017
+  - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+    - job: SignPublish
+      dependsOn:
+        - Build
+      displayName: Sign and Publish Artifacts
+      pool:
+        name: NetCoreInternal-Pool
+        queue: buildpool.windows.10.amd64.vs2017
+
+      variables:
+        ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+          _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+
+      steps:
+      - task: DownloadBuildArtifacts@0
+        displayName: Download Build Artifacts
+        inputs:
+          artifactName: Microsoft.Spark.Binaries
+          downloadPath: $(Build.ArtifactStagingDirectory)
+    
+      - task: MicroBuildSigningPlugin@2
+        displayName: Install MicroBuild plugin
+        inputs:
+          signType: $(_SignType)
+          zipSources: false
+          feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
+        env:
+          TeamName: $(_TeamName)
+        condition: and(succeeded(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
+    
+      - task: PowerShell@2
+        displayName: Sign artifacts and Package Microsoft.Spark.Worker
+        inputs:
+          filePath: eng\common\build.ps1
+          arguments: -restore -sign -publish
+                     -c $(buildConfiguration)
+                     -ci
+                     $(_OfficialBuildIdArgs)
+                     /p:DotNetSignType=$(_SignType)
+                     /p:SparkPackagesDir=$(ArtifactPath)\BuildArtifacts\artifacts\packages
+                     /p:SparkWorkerPublishDir=$(ArtifactPath)\Microsoft.Spark.Worker
+                     /p:SparkWorkerPackageOutputDir=$(ArtifactPath)
+
+      - task: PublishBuildArtifacts@1
+        inputs:
+          pathtoPublish: '$(ArtifactPath)'
+          artifactName:  Microsoft.Spark.Binaries
+
+- stage: Test
+  displayName: E2E Tests
+  dependsOn: Build
+  jobs:
+  - job: Run
+    pool: Hosted VS2017
 
     variables:
       ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
         _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+      HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+      DOTNET_WORKER_DIR: $(CurrentDotnetWorkerDir)           
 
     steps:
     - task: DownloadBuildArtifacts@0
@@ -443,31 +171,359 @@ jobs:
       inputs:
         artifactName: Microsoft.Spark.Binaries
         downloadPath: $(Build.ArtifactStagingDirectory)
-    
-    - task: MicroBuildSigningPlugin@2
-      displayName: Install MicroBuild plugin
+
+    - task: CopyFiles@2
+      displayName: Copy jars
+      inputs:
+        sourceFolder: $(ArtifactPath)/Jars
+        contents: '**/*.jar'
+        targetFolder: $(Build.SourcesDirectory)/src/scala
+
+    - task: BatchScript@1
+      displayName: Download Spark Distros & Winutils.exe
+      inputs:
+        filename: script\download-spark-distros.cmd
+        arguments: $(Build.BinariesDirectory)
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.0'
       inputs:
-        signType: $(_SignType)
-        zipSources: false
-        feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
       env:
-        TeamName: $(_TeamName)
-      condition: and(succeeded(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
-    
-    - task: PowerShell@2
-      displayName: Sign artifacts and Package Microsoft.Spark.Worker
-      inputs:
-        filePath: eng\common\build.ps1
-        arguments: -restore -sign -publish
-                   -c $(buildConfiguration)
-                   -ci
-                   $(_OfficialBuildIdArgs)
-                   /p:DotNetSignType=$(_SignType)
-                   /p:SparkPackagesDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries\BuildArtifacts\artifacts\packages
-                   /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries\Microsoft.Spark.Worker
-                   /p:SparkWorkerPackageOutputDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
 
-    - task: PublishBuildArtifacts@1
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.1'
       inputs:
-        pathtoPublish: '$(Build.ArtifactStagingDirectory)/Microsoft.Spark.Binaries'
-        artifactName:  Microsoft.Spark.Binaries
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.2'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.0'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.1'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+  
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.5'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+- stage: ForwardCompatibility
+  displayName: E2E Forward Compatibility Tests
+  dependsOn: Build
+  jobs:
+  - job: Run
+    pool: Hosted VS2017
+
+    variables:
+      ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+        _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+      HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+      DOTNET_WORKER_DIR: $(CurrentDotnetWorkerDir)           
+
+    steps:
+    - checkout: forwardCompatibleRelease
+      path: s\$(forwardCompatibleRelease)
+
+    - task: Maven@3
+      displayName: 'Maven build src for forward compatible release v$(forwardCompatibleRelease)'
+      inputs:
+        mavenPomFile: src/scala/pom.xml
+
+    - task: DownloadBuildArtifacts@0
+      displayName: Download Build Artifacts
+      inputs:
+        artifactName: Microsoft.Spark.Binaries
+        downloadPath: $(Build.ArtifactStagingDirectory)
+
+    - task: BatchScript@1
+      displayName: Download Spark Distros & Winutils.exe
+      inputs:
+        filename: script\download-spark-distros.cmd
+        arguments: $(Build.BinariesDirectory)
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.0'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.1'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.2'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.0'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.1'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+  
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.5'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+- stage: BackwardCompatibility
+  displayName: E2E Backward Compatibility Tests
+  dependsOn: Build
+  jobs:
+  - job: Run
+    pool: Hosted VS2017
+
+    variables:
+      ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+        _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+      HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+      DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)           
+
+    steps:
+    - task: DownloadBuildArtifacts@0
+      displayName: Download Build Artifacts
+      inputs:
+        artifactName: Microsoft.Spark.Binaries
+        downloadPath: $(Build.ArtifactStagingDirectory)
+
+    - task: CopyFiles@2
+      displayName: Copy jars
+      inputs:
+        sourceFolder: $(ArtifactPath)/Jars
+        contents: '**/*.jar'
+        targetFolder: $(Build.SourcesDirectory)/src/scala
+
+    - task: BatchScript@1
+      displayName: Download Spark Distros & Winutils.exe
+      inputs:
+        filename: script\download-spark-distros.cmd
+        arguments: $(Build.BinariesDirectory)
+
+    - task: BatchScript@1
+      displayName: Download backward compatible worker v$(backwardCompatibleRelease)
+      inputs:
+        filename: script\download-worker-release.cmd
+        arguments: '$(Build.BinariesDirectory) $(backwardCompatibleRelease)'
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.0'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.1'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.2'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.3.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.0'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.1'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+  
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.3'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.4'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.5'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
\ No newline at end of file

From c8992e0787c6f50f2aa3572839fe67b872a3363a Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Sun, 14 Jun 2020 14:30:50 -0700
Subject: [PATCH 06/27] Support Spark 2.4.6 (#547)

---
 azure-pipelines.yml                           | 21 ++++++++++++++++++-
 script/download-spark-distros.cmd             |  4 +++-
 .../spark/deploy/dotnet/DotnetRunner.scala    |  3 ++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 14b6e689a..8ba73e0c1 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -275,6 +275,15 @@ stages:
       env:
         SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
 
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.6'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.6-bin-hadoop2.7
+
 - stage: ForwardCompatibility
   displayName: E2E Forward Compatibility Tests
   dependsOn: Build
@@ -526,4 +535,14 @@ stages:
         projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
         arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
       env:
-        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
\ No newline at end of file
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+    - task: DotNetCoreCLI@2
+      displayName: 'E2E tests for Spark 2.4.6'
+      inputs:
+        command: test
+        projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+        arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+      env:
+        SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.6-bin-hadoop2.7
+
diff --git a/script/download-spark-distros.cmd b/script/download-spark-distros.cmd
index d02bb49a7..0d2435a00 100644
--- a/script/download-spark-distros.cmd
+++ b/script/download-spark-distros.cmd
@@ -23,5 +23,7 @@ curl -k -L -o spark-2.4.1.tgz https://archive.apache.org/dist/spark/spark-2.4.1/
 curl -k -L -o spark-2.4.3.tgz https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz && tar xzvf spark-2.4.3.tgz
 curl -k -L -o spark-2.4.4.tgz https://archive.apache.org/dist/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz && tar xzvf spark-2.4.4.tgz
 curl -k -L -o spark-2.4.5.tgz https://archive.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz && tar xzvf spark-2.4.5.tgz
+curl -k -L -o spark-2.4.6.tgz https://archive.apache.org/dist/spark/spark-2.4.6/spark-2.4.6-bin-hadoop2.7.tgz && tar xzvf spark-2.4.6.tgz
+
+endlocal
 
-endlocal
\ No newline at end of file
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/deploy/dotnet/DotnetRunner.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/deploy/dotnet/DotnetRunner.scala
index 65a56e3e8..5925dcca9 100644
--- a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/deploy/dotnet/DotnetRunner.scala
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/deploy/dotnet/DotnetRunner.scala
@@ -34,7 +34,8 @@ import scala.util.Try
  */
 object DotnetRunner extends Logging {
   private val DEBUG_PORT = 5567
-  private val supportedSparkVersions = Set[String]("2.4.0", "2.4.1", "2.4.3", "2.4.4", "2.4.5")
+  private val supportedSparkVersions =
+      Set[String]("2.4.0", "2.4.1", "2.4.3", "2.4.4", "2.4.5", "2.4.6")
 
   val SPARK_VERSION = DotnetUtils.normalizeSparkVersion(spark.SPARK_VERSION)
 

From 9240bfaf721443a2c5556034ce126f7adbb6272d Mon Sep 17 00:00:00 2001
From: JavierAndres <JavierLight@users.noreply.github.com>
Date: Mon, 15 Jun 2020 19:47:31 +0200
Subject: [PATCH 07/27] Fix AppName in examples to follow Spark naming
 convention (#548)

---
 .../MachineLearning/Sentiment/Program.cs                        | 2 +-
 examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs     | 2 +-
 .../Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs     | 2 +-
 .../Sql/Batch/VectorDataFrameUdfs.cs                            | 2 +-
 .../Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs b/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
index efb85e468..51f63078d 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
@@ -27,7 +27,7 @@ public void Run(string[] args)
 
             SparkSession spark = SparkSession
                 .Builder()
-                .AppName(".NET for Apache Spark Sentiment Analysis")
+                .AppName("Sentiment Analysis using .NET for Apache Spark")
                 .GetOrCreate();
 
             // Read in and display Yelp reviews
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
index 6ef95eefa..fe57f7d1b 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
@@ -26,7 +26,7 @@ public void Run(string[] args)
 
             SparkSession spark = SparkSession
                 .Builder()
-                .AppName(".NET Spark SQL basic example")
+                .AppName("SQL basic example using .NET for Apache Spark")
                 .Config("spark.some.config.option", "some-value")
                 .GetOrCreate();
 
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
index cf41eeceb..0945df791 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
@@ -32,7 +32,7 @@ public void Run(string[] args)
 
             SparkSession spark = SparkSession
                 .Builder()
-                .AppName(".NET Spark SQL Datasource example")
+                .AppName("SQL Datasource example using .NET for Apache Spark")
                 .Config("spark.some.config.option", "some-value")
                 .GetOrCreate();
 
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
index 697301733..aafea7256 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
@@ -31,7 +31,7 @@ public void Run(string[] args)
                 .Builder()
                 // Lower the shuffle partitions to speed up groupBy() operations.
                 .Config("spark.sql.shuffle.partitions", "3")
-                .AppName(".NET Spark SQL VectorUdfs example")
+                .AppName("SQL VectorUdfs example using .NET for Apache Spark")
                 .GetOrCreate();
 
             DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]);
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
index 369cc3aff..2497d5ef3 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
@@ -29,7 +29,7 @@ public void Run(string[] args)
                 .Builder()
                 // Lower the shuffle partitions to speed up groupBy() operations.
                 .Config("spark.sql.shuffle.partitions", "3")
-                .AppName(".NET Spark SQL VectorUdfs example")
+                .AppName("SQL VectorUdfs example using .NET for Apache Spark")
                 .GetOrCreate();
 
             DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]);

From ff7c7640d5aad9d53eb127f71e28d04f8cc8a88d Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Fri, 19 Jun 2020 17:12:02 -0700
Subject: [PATCH 08/27] Microsoft.Spark.Extensions.DotNet.Interactive support
 latest changes to ISupportNuget (#554)

---
 NuGet.config                                              | 2 ++
 ...ft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj | 4 +---
 .../PackageResolverTests.cs                               | 6 +++---
 .../AssemblyKernelExtension.cs                            | 2 +-
 .../Microsoft.Spark.Extensions.DotNet.Interactive.csproj  | 6 ++----
 .../PackageResolver.cs                                    | 8 ++++----
 ...ageRestoreContextWrapper.cs => SupportNugetWrapper.cs} | 3 +--
 .../Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj  | 3 ---
 8 files changed, 14 insertions(+), 20 deletions(-)
 rename src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/{PackageRestoreContextWrapper.cs => SupportNugetWrapper.cs} (82%)

diff --git a/NuGet.config b/NuGet.config
index 7b7b765e2..9d2866825 100644
--- a/NuGet.config
+++ b/NuGet.config
@@ -6,5 +6,7 @@
     <add key="dotnet-core" value="https://dotnetfeed.blob.core.windows.net/dotnet-core/index.json" />
     <add key="dotnet-tools" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" />
     <add key="dotnet-eng" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-eng/nuget/v3/index.json" />
+    <add key="dotnet5" value="https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" />
+    <add key="dotnet-try" value="https://dotnet.myget.org/F/dotnet-try/api/v3/index.json" />
   </packageSources>
 </configuration>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
index b0af57cf2..391582751 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
@@ -4,13 +4,11 @@
     <TargetFramework>netcoreapp3.1</TargetFramework>
     <RootNamespace>Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest</RootNamespace>
     <IsPackable>false</IsPackable>
-    
-    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
   </PropertyGroup>
 
   <ItemGroup>
     <PackageReference Include="Moq" Version="4.10.0" />
-    <PackageReference Include="Microsoft.DotNet.Interactive" Version="1.0.0-beta.20262.1" />
+    <PackageReference Include="Microsoft.DotNet.Interactive" Version="1.0.0-beta.20319.1" />
   </ItemGroup>
   
   <ItemGroup>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
index 8fac95d7a..219c533ff 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
@@ -40,8 +40,8 @@ public void TestPackageResolver()
             };
             var probingPaths = new List<DirectoryInfo> { new DirectoryInfo(packageRootPath) };
 
-            var mockPackageRestoreContextWrapper = new Mock<PackageRestoreContextWrapper>();
-            mockPackageRestoreContextWrapper
+            var mockSupportNugetWrapper = new Mock<SupportNugetWrapper>();
+            mockSupportNugetWrapper
                 .SetupGet(m => m.ResolvedPackageReferences)
                 .Returns(new ResolvedPackageReference[]
                 {
@@ -53,7 +53,7 @@ public void TestPackageResolver()
                         probingPaths) 
                 });
 
-            var packageResolver = new PackageResolver(mockPackageRestoreContextWrapper.Object);
+            var packageResolver = new PackageResolver(mockSupportNugetWrapper.Object);
             IEnumerable<string> actualFiles = packageResolver.GetFiles(tempDir.Path);
 
             string metadataFilePath =
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
index 80977c46e..2deff5869 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
@@ -27,7 +27,7 @@ public class AssemblyKernelExtension : IKernelExtension
         private const string TempDirEnvVar = "DOTNET_SPARK_EXTENSION_INTERACTIVE_TMPDIR";
 
         private readonly PackageResolver _packageResolver =
-            new PackageResolver(new PackageRestoreContextWrapper());
+            new PackageResolver(new SupportNugetWrapper());
 
         /// <summary>
         /// Called by the Microsoft.DotNet.Interactive Assembly Extension Loader.
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
index 6966e3390..da330c762 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
@@ -6,14 +6,12 @@
     <RootNamespace>Microsoft.Spark.Extensions.DotNet.Interactive</RootNamespace>
     <GenerateDocumentationFile>true</GenerateDocumentationFile>
     <IsPackable>true</IsPackable>
-    <!-- NU5100 warns that a dll was found outside the 'lib' folder while packaging. DotNet.Interactive expects extension dlls in the 'interactive-extensions/dotnet'. -->
+    <!-- NU5100 warns that a dll was found outside the 'lib' folder while packaging. DotNet.Interactive expects extension dlls in 'interactive-extensions/dotnet'. -->
     <NoWarn>NU5100;$(NoWarn)</NoWarn>
 
     <Description>DotNet Interactive Extension for .NET for Apache Spark</Description>
     <PackageReleaseNotes>https://github.com/dotnet/spark/tree/master/docs/release-notes</PackageReleaseNotes>
     <PackageTags>spark;dotnet;csharp;interactive;dotnet-interactive</PackageTags>
-
-    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
   </PropertyGroup>
   
   <ItemGroup>
@@ -22,7 +20,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.DotNet.Interactive.CSharp" Version="1.0.0-beta.20262.1">
+    <PackageReference Include="Microsoft.DotNet.Interactive.CSharp" Version="1.0.0-beta.20319.1">
       <PrivateAssets>all</PrivateAssets>
     </PackageReference>
   </ItemGroup>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
index 4e91156ba..f9a76e43f 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
@@ -13,13 +13,13 @@ namespace Microsoft.Spark.Extensions.DotNet.Interactive
 {
     internal class PackageResolver
     {
-        private readonly PackageRestoreContextWrapper _packageRestoreContextWrapper;
+        private readonly SupportNugetWrapper _supportNugetWrapper;
         private readonly ConcurrentDictionary<string, byte> _filesCopied;
         private long _metadataCounter;
 
-        internal PackageResolver(PackageRestoreContextWrapper packageRestoreContextWrapper)
+        internal PackageResolver(SupportNugetWrapper supportNugetWrapper)
         {
-            _packageRestoreContextWrapper = packageRestoreContextWrapper;
+            _supportNugetWrapper = supportNugetWrapper;
             _filesCopied = new ConcurrentDictionary<string, byte>();
             _metadataCounter = 0;
         }
@@ -112,7 +112,7 @@ internal IEnumerable<string> GetFiles(string writePath)
         private IEnumerable<ResolvedNuGetPackage> GetNewPackages()
         {
             IEnumerable<ResolvedPackageReference> packages =
-                _packageRestoreContextWrapper.ResolvedPackageReferences;
+                _supportNugetWrapper.ResolvedPackageReferences;
             foreach (ResolvedPackageReference package in packages)
             {
                 IEnumerable<FileInfo> files =
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs
similarity index 82%
rename from src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs
rename to src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs
index 259088d7a..489e39e94 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageRestoreContextWrapper.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs
@@ -4,11 +4,10 @@
 
 namespace Microsoft.Spark.Extensions.DotNet.Interactive
 {
-    internal class PackageRestoreContextWrapper
+    internal class SupportNugetWrapper
     {
         internal virtual IEnumerable<ResolvedPackageReference> ResolvedPackageReferences =>
             ((ISupportNuget)KernelInvocationContext.Current.HandlingKernel)
-            .PackageRestoreContext
             .ResolvedPackageReferences;
     }
 }
diff --git a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
index 1be21a7ac..f18f41963 100644
--- a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
+++ b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj
@@ -6,9 +6,6 @@
     <TargetFrameworks Condition="'$(OS)' != 'Windows_NT'">netcoreapp3.1</TargetFrameworks>
     <RootNamespace>Microsoft.Spark.Worker</RootNamespace>
     <GenerateDocumentationFile>true</GenerateDocumentationFile>
-
-    <!--Microsoft.DotNet.DependencyManager requires this additional source-->
-    <RestoreAdditionalProjectSources>https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet3.1/nuget/v3/index.json</RestoreAdditionalProjectSources>
   </PropertyGroup>
 
   <ItemGroup>

From 3701d2e9c3076d75c6dc7a9a71a229175cf39501 Mon Sep 17 00:00:00 2001
From: Laneser <laneser.kuo@gmail.com>
Date: Mon, 22 Jun 2020 01:17:22 +0800
Subject: [PATCH 09/27] Fix broken maven link in
 building/ubuntu-instructions.md (#558)

---
 docs/building/ubuntu-instructions.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md
index 8bb11b163..0e3dbdf40 100644
--- a/docs/building/ubuntu-instructions.md
+++ b/docs/building/ubuntu-instructions.md
@@ -35,14 +35,14 @@ If you already have all the pre-requisites, skip to the [build](ubuntu-instructi
        ```bash
        sudo update-alternatives --config java
        ```
-  3. Install **[Apache Maven 3.6.0+](https://maven.apache.org/download.cgi)**
+  3. Install **[Apache Maven 3.6.3+](https://maven.apache.org/download.cgi)**
      - Run the following command:
        ```bash
        mkdir -p ~/bin/maven
        cd ~/bin/maven
-       wget https://www-us.apache.org/dist/maven/maven-3/3.6.0/binaries/apache-maven-3.6.0-bin.tar.gz
-       tar -xvzf apache-maven-3.6.0-bin.tar.gz
-       ln -s apache-maven-3.6.0 current
+       wget https://www-us.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
+       tar -xvzf apache-maven-3.6.3-bin.tar.gz
+       ln -s apache-maven-3.6.3 current
        export M2_HOME=~/bin/maven/current
        export PATH=${M2_HOME}/bin:${PATH}
        source ~/.bashrc
@@ -54,11 +54,11 @@ If you already have all the pre-requisites, skip to the [build](ubuntu-instructi
        <summary>&#x1F4D9; Click to see sample mvn -version output</summary>
        
        ```
-       Apache Maven 3.6.0 (97c98ec64a1fdfee7767ce5ffb20918da4f719f3; 2018-10-24T18:41:47Z)
-       Maven home: ~/bin/apache-maven-3.6.0
-       Java version: 1.8.0_191, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-8-openjdk-amd64/jre
-       Default locale: en, platform encoding: UTF-8
-       OS name: "linux", version: "4.4.0-17763-microsoft", arch: "amd64", family: "unix"
+       Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
+       Maven home: ~/bin/apache-maven-3.6.3
+       Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-8-openjdk-amd64/jre
+       Default locale: en_US, platform encoding: ANSI_X3.4-1968
+       OS name: "linux", version: "4.4.0-142-generic", arch: "amd64", family: "unix"
        ```
   4. Install **[Apache Spark 2.3+](https://spark.apache.org/downloads.html)**
      - Download [Apache Spark 2.3+](https://spark.apache.org/downloads.html) and extract it into a local folder (e.g., `~/bin/spark-2.3.2-bin-hadoop2.7`)

From 6f835a5e0291ffed5ddcc449378fe76bfd722714 Mon Sep 17 00:00:00 2001
From: Laneser <laneser.kuo@gmail.com>
Date: Mon, 22 Jun 2020 11:17:03 +0800
Subject: [PATCH 10/27] Fix maven broken link in windows build doc (#560)

---
 docs/building/windows-instructions.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/building/windows-instructions.md b/docs/building/windows-instructions.md
index 84874a129..aad141b68 100644
--- a/docs/building/windows-instructions.md
+++ b/docs/building/windows-instructions.md
@@ -30,10 +30,10 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
   3. Install **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** 
      - Select the appropriate version for your operating system e.g., jdk-8u201-windows-x64.exe for Win x64 machine.
      - Install using the installer and verify you are able to run `java` from your command-line
-  4. Install **[Apache Maven 3.6.0+](https://maven.apache.org/download.cgi)**
-     - Download [Apache Maven 3.6.0](http://mirror.metrocast.net/apache/maven/maven-3/3.6.0/binaries/apache-maven-3.6.0-bin.zip)
-     - Extract to a local directory e.g., `c:\bin\apache-maven-3.6.0\`
-     - Add Apache Maven to your [PATH environment variable](https://www.java.com/en/download/help/path.xml) e.g., `c:\bin\apache-maven-3.6.0\bin`
+  4. Install **[Apache Maven 3.6.3+](https://maven.apache.org/download.cgi)**
+     - Download [Apache Maven 3.6.3](http://mirror.metrocast.net/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.zip)
+     - Extract to a local directory e.g., `c:\bin\apache-maven-3.6.3\`
+     - Add Apache Maven to your [PATH environment variable](https://www.java.com/en/download/help/path.xml) e.g., `c:\bin\apache-maven-3.6.3\bin`
      - Verify you are able to run `mvn` from your command-line
   5. Install **[Apache Spark 2.3+](https://spark.apache.org/downloads.html)**
      - Download [Apache Spark 2.3+](https://spark.apache.org/downloads.html) and extract it into a local folder (e.g., `c:\bin\spark-2.3.2-bin-hadoop2.7\`) using [7-zip](https://www.7-zip.org/).

From 7bb3dd1817095bb4685311be48bbf4ebab21739e Mon Sep 17 00:00:00 2001
From: Andrew Fogarty <andrew.f.fogarty@gmail.com>
Date: Sun, 21 Jun 2020 23:56:49 -0700
Subject: [PATCH 11/27] Hyperspace Extension (#555)

---
 .../Constants.cs                              |  14 ++
 .../HyperspaceFixture.cs                      |  32 ++++
 .../HyperspaceTests.cs                        | 141 ++++++++++++++++++
 .../Index/IndexConfigTests.cs                 |  86 +++++++++++
 ...Spark.Extensions.Hyperspace.E2ETest.csproj |  13 ++
 .../Hyperspace.cs                             | 113 ++++++++++++++
 .../HyperspaceSparkSessionExtensions.cs       |  55 +++++++
 .../Index/Builder.cs                          |  74 +++++++++
 .../Index/IndexConfig.cs                      |  92 ++++++++++++
 ...crosoft.Spark.Extensions.Hyperspace.csproj |  13 ++
 .../Microsoft.Spark.E2ETest.csproj            |   1 +
 src/csharp/Microsoft.Spark.sln                |  14 ++
 .../Interop/Internal/Scala/Seq.cs             |  41 +++++
 .../Microsoft.Spark/Microsoft.Spark.csproj    |   2 +
 14 files changed, 691 insertions(+)
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Constants.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceFixture.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceTests.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Index/IndexConfigTests.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Hyperspace.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceSparkSessionExtensions.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/Builder.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/IndexConfig.cs
 create mode 100644 src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Microsoft.Spark.Extensions.Hyperspace.csproj
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Internal/Scala/Seq.cs

diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Constants.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Constants.cs
new file mode 100644
index 000000000..969dd85f1
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Constants.cs
@@ -0,0 +1,14 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
+{
+    /// <summary>
+    /// Constants related to the Hyperspace test suite.
+    /// </summary>
+    internal class Constants
+    {
+        public const string HyperspaceTestContainerName = "Hyperspace Tests";
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceFixture.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceFixture.cs
new file mode 100644
index 000000000..8578c77f0
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceFixture.cs
@@ -0,0 +1,32 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.Spark.E2ETest;
+using Xunit;
+
+namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
+{
+    public class HyperspaceFixture
+    {
+        public HyperspaceFixture()
+        {
+            Environment.SetEnvironmentVariable(
+                SparkFixture.EnvironmentVariableNames.ExtraSparkSubmitArgs,
+                "--packages com.microsoft.hyperspace:hyperspace-core_2.11:0.1.0");
+
+            SparkFixture = new SparkFixture();
+        }
+
+        public SparkFixture SparkFixture { get; private set; }
+    }
+
+    [CollectionDefinition(Constants.HyperspaceTestContainerName)]
+    public class HyperspaceTestCollection : ICollectionFixture<HyperspaceFixture>
+    {
+        // This class has no code, and is never created. Its purpose is simply
+        // to be the place to apply [CollectionDefinition] and all the
+        // ICollectionFixture<> interfaces.
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceTests.cs
new file mode 100644
index 000000000..12e8bca60
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceTests.cs
@@ -0,0 +1,141 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.Spark.E2ETest.Utils;
+using Microsoft.Spark.Extensions.Hyperspace.Index;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Xunit;
+
+namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
+{
+    /// <summary>
+    /// Test suite for Hyperspace index management APIs.
+    /// </summary>
+    [Collection(Constants.HyperspaceTestContainerName)]
+    public class HyperspaceTests : IDisposable
+    {
+        private readonly SparkSession _spark;
+        private readonly TemporaryDirectory _hyperspaceSystemDirectory;
+        private readonly Hyperspace _hyperspace;
+
+        // Fields needed for sample DataFrame.
+        private readonly DataFrame _sampleDataFrame;
+        private readonly string _sampleIndexName;
+        private readonly IndexConfig _sampleIndexConfig;
+
+        public HyperspaceTests(HyperspaceFixture fixture)
+        {
+            _spark = fixture.SparkFixture.Spark;
+            _hyperspaceSystemDirectory = new TemporaryDirectory();
+            _spark.Conf().Set("spark.hyperspace.system.path", _hyperspaceSystemDirectory.Path);
+            _hyperspace = new Hyperspace(_spark);
+
+            _sampleDataFrame = _spark.Read()
+                .Option("header", true)
+                .Option("delimiter", ";")
+                .Csv("Resources\\people.csv");
+            _sampleIndexName = "sample_dataframe";
+            _sampleIndexConfig = new IndexConfig(_sampleIndexName, new[] { "job" }, new[] { "name" });
+            _hyperspace.CreateIndex(_sampleDataFrame, _sampleIndexConfig);
+        }
+
+        /// <summary>
+        /// Clean up the Hyperspace system directory in between tests.
+        /// </summary>
+        public void Dispose()
+        {
+            _hyperspaceSystemDirectory.Dispose();
+        }
+
+        /// <summary>
+        /// Test the method signatures for all Hyperspace APIs.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestSignatures()
+        {
+            // Indexes API.
+            Assert.IsType<DataFrame>(_hyperspace.Indexes());
+
+            // Delete and Restore APIs.
+            _hyperspace.DeleteIndex(_sampleIndexName);
+            _hyperspace.RestoreIndex(_sampleIndexName);
+
+            // Refresh API.
+            _hyperspace.RefreshIndex(_sampleIndexName);
+
+            // Cancel API.
+            Assert.Throws<Exception>(() => _hyperspace.Cancel(_sampleIndexName));
+
+            // Explain API.
+            _hyperspace.Explain(_sampleDataFrame, true);
+            _hyperspace.Explain(_sampleDataFrame, true, s => Console.WriteLine(s));
+
+            // Delete and Vacuum APIs.
+            _hyperspace.DeleteIndex(_sampleIndexName);
+            _hyperspace.VacuumIndex(_sampleIndexName);
+
+            // Enable and disable Hyperspace.
+            Assert.IsType<SparkSession>(_spark.EnableHyperspace());
+            Assert.IsType<SparkSession>(_spark.DisableHyperspace());
+            Assert.IsType<bool>(_spark.IsHyperspaceEnabled());
+        }
+
+        /// <summary>
+        /// Test E2E functionality of index CRUD APIs.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestIndexCreateAndDelete()
+        {
+            // Should be one active index.
+            DataFrame indexes = _hyperspace.Indexes();
+            Assert.Equal(1, indexes.Count());
+            Assert.Equal(_sampleIndexName, indexes.SelectExpr("name").First()[0]);
+            Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);
+
+            // Delete the index then verify it has been deleted.
+            _hyperspace.DeleteIndex(_sampleIndexName);
+            indexes = _hyperspace.Indexes();
+            Assert.Equal(1, indexes.Count());
+            Assert.Equal(States.Deleted, indexes.SelectExpr("state").First()[0]);
+
+            // Restore the index to active state and verify it is back.
+            _hyperspace.RestoreIndex(_sampleIndexName);
+            indexes = _hyperspace.Indexes();
+            Assert.Equal(1, indexes.Count());
+            Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);
+
+            // Delete and vacuum the index, then verify it is gone.
+            _hyperspace.DeleteIndex(_sampleIndexName);
+            _hyperspace.VacuumIndex(_sampleIndexName);
+            Assert.Equal(0, _hyperspace.Indexes().Count());
+        }
+
+        /// <summary>
+        /// Test that the explain API generates the expected string.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestExplainAPI()
+        {
+            // Run a query that hits the index.
+            DataFrame queryDataFrame = _sampleDataFrame
+                .Where("job == 'Developer'")
+                .Select("name");
+
+            string explainString = string.Empty;
+            _hyperspace.Explain(queryDataFrame, true, s => explainString = s);
+            Assert.False(string.IsNullOrEmpty(explainString));
+        }
+
+        /// <summary>
+        /// Index states used in testing.
+        /// </summary>
+        private static class States
+        {
+            public const string Active = "ACTIVE";
+            public const string Deleted = "DELETED";
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Index/IndexConfigTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Index/IndexConfigTests.cs
new file mode 100644
index 000000000..b96f85432
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Index/IndexConfigTests.cs
@@ -0,0 +1,86 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Spark.E2ETest.Utils;
+using Microsoft.Spark.Extensions.Hyperspace.Index;
+using Xunit;
+
+namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest.Index
+{
+    /// <summary>
+    /// Test suite for Hyperspace IndexConfig tests.
+    /// </summary>
+    [Collection(Constants.HyperspaceTestContainerName)]
+    public class IndexConfigTests
+    {
+        public IndexConfigTests(HyperspaceFixture fixture)
+        {
+        }
+
+        /// <summary>
+        /// Test the method signatures for IndexConfig and IndexConfigBuilder APIs.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestSignatures()
+        {
+            string indexName = "testIndexName";
+            var indexConfig = new IndexConfig(indexName, new[] { "Id" }, new string[] { });
+            Assert.IsType<string>(indexConfig.IndexName);
+            Assert.IsType<List<string>>(indexConfig.IndexedColumns);
+            Assert.IsType<List<string>>(indexConfig.IncludedColumns);
+            Assert.IsType<Builder>(IndexConfig.Builder());
+            Assert.IsType<bool>(indexConfig.Equals(indexConfig));
+            Assert.IsType<int>(indexConfig.GetHashCode());
+            Assert.IsType<string>(indexConfig.ToString());
+
+            Builder builder = IndexConfig.Builder();
+            Assert.IsType<Builder>(builder);
+            Assert.IsType<Builder>(builder.IndexName("indexName"));
+            Assert.IsType<Builder>(builder.IndexBy("indexed1", "indexed2"));
+            Assert.IsType<Builder>(builder.Include("included1"));
+            Assert.IsType<IndexConfig>(builder.Create());
+        }
+
+        /// <summary>
+        /// Test creating an IndexConfig using its class constructor.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestIndexConfigConstructor()
+        {
+            string indexName = "indexName";
+            string[] indexedColumns = { "idx1" };
+            string[] includedColumns = { "inc1", "inc2", "inc3" };
+            var config = new IndexConfig(indexName, indexedColumns, includedColumns);
+
+            // Validate that the config was built correctly.
+            Assert.Equal(indexName, config.IndexName);
+            Assert.Equal(indexedColumns, config.IndexedColumns);
+            Assert.Equal(includedColumns, config.IncludedColumns);
+        }
+
+        /// <summary>
+        /// Test creating an IndexConfig using the builder pattern.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestIndexConfigBuilder()
+        {
+            string indexName = "indexName";
+            string[] indexedColumns = { "idx1" };
+            string[] includedColumns = { "inc1", "inc2", "inc3" };
+
+            Builder builder = IndexConfig.Builder();
+            builder.IndexName(indexName);
+            builder.Include(includedColumns[0], includedColumns[1], includedColumns[2]);
+            builder.IndexBy(indexedColumns[0]);
+
+            // Validate that the config was built correctly.
+            IndexConfig config = builder.Create();
+            Assert.Equal(indexName, config.IndexName);
+            Assert.Equal(indexedColumns, config.IndexedColumns);
+            Assert.Equal(includedColumns, config.IncludedColumns);
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj
new file mode 100644
index 000000000..231022e4b
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj
@@ -0,0 +1,13 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netcoreapp3.1</TargetFramework>
+    <IsPackable>false</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\Microsoft.Spark.E2ETest\Microsoft.Spark.E2ETest.csproj" />
+    <ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
+    <ProjectReference Include="..\Microsoft.Spark.Extensions.Hyperspace\Microsoft.Spark.Extensions.Hyperspace.csproj" />
+  </ItemGroup>
+</Project>
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Hyperspace.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Hyperspace.cs
new file mode 100644
index 000000000..13509779d
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Hyperspace.cs
@@ -0,0 +1,113 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.Spark.Extensions.Hyperspace.Index;
+using Microsoft.Spark.Interop.Ipc;
+using Microsoft.Spark.Sql;
+
+namespace Microsoft.Spark.Extensions.Hyperspace
+{
+    /// <summary>
+    /// .Net for Spark binding for Hyperspace index management APIs.
+    /// </summary>
+    public class Hyperspace : IJvmObjectReferenceProvider
+    {
+        private static readonly string s_hyperspaceClassName =
+            "com.microsoft.hyperspace.Hyperspace";
+        private readonly SparkSession _spark;
+        private readonly IJvmBridge _jvmBridge;
+        private readonly JvmObjectReference _jvmObject;
+
+        public Hyperspace(SparkSession spark)
+        {
+            _spark = spark;
+            _jvmBridge = ((IJvmObjectReferenceProvider)spark).Reference.Jvm;
+            _jvmObject = _jvmBridge.CallConstructor(s_hyperspaceClassName, spark);
+        }
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+
+        /// <summary>
+        /// Collect all the index metadata.
+        /// </summary>
+        /// <returns>All index metadata as a <see cref="DataFrame"/>.</returns>
+        public DataFrame Indexes() =>
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("indexes"));
+
+        /// <summary>
+        /// Create index.
+        /// </summary>
+        /// <param name="df">The DataFrame object to build index on.</param>
+        /// <param name="indexConfig">The configuration of index to be created.</param>
+        public void CreateIndex(DataFrame df, IndexConfig indexConfig) =>
+            _jvmObject.Invoke("createIndex", df, indexConfig);
+
+        /// <summary>
+        /// Soft deletes the index with given index name.
+        /// </summary>
+        /// <param name="indexName">The name of index to delete.</param>
+        public void DeleteIndex(string indexName) => _jvmObject.Invoke("deleteIndex", indexName);
+
+        /// <summary>
+        /// Restores index with given index name.
+        /// </summary>
+        /// <param name="indexName">Name of the index to restore.</param>
+        public void RestoreIndex(string indexName) => _jvmObject.Invoke("restoreIndex", indexName);
+
+        /// <summary>
+        /// Does hard delete of indexes marked as <c>DELETED</c>.
+        /// </summary>
+        /// <param name="indexName">Name of the index to restore.</param>
+        public void VacuumIndex(string indexName) => _jvmObject.Invoke("vacuumIndex", indexName);
+
+        /// <summary>
+        /// Update indexes for the latest version of the data.
+        /// </summary>
+        /// <param name="indexName">Name of the index to refresh.</param>
+        public void RefreshIndex(string indexName) => _jvmObject.Invoke("refreshIndex", indexName);
+
+        /// <summary>
+        /// Cancel api to bring back index from an inconsistent state to the last known stable
+        /// state.
+        /// 
+        /// E.g. if index fails during creation, in <c>CREATING</c> state.
+        /// The index will not allow any index modifying operations unless a cancel is called.
+        /// 
+        /// Note: Cancel from <c>VACUUMING</c> state will move it forward to <c>DOESNOTEXIST</c>
+        /// state.
+        /// 
+        /// Note: If no previous stable state exists, cancel will move it to <c>DOESNOTEXIST</c>
+        /// state.
+        /// </summary>
+        /// <param name="indexName">Name of the index to cancel.</param>
+        public void Cancel(string indexName) => _jvmObject.Invoke("cancel", indexName);
+
+        /// <summary>
+        /// Explains how indexes will be applied to the given dataframe.
+        /// </summary>
+        /// <param name="df">dataFrame</param>
+        /// <param name="verbose">Flag to enable verbose mode.</param>
+        public void Explain(DataFrame df, bool verbose) =>
+            Explain(df, verbose, s => Console.WriteLine(s));
+
+        /// <summary>
+        /// Explains how indexes will be applied to the given dataframe.
+        /// </summary>
+        /// <param name="df">dataFrame</param>
+        /// <param name="verbose">Flag to enable verbose mode.</param>
+        /// <param name="redirectFunc">Function to redirect output of explain.</param>
+        public void Explain(DataFrame df, bool verbose, Action<string> redirectFunc)
+        {
+            var explainString = (string)_jvmBridge.CallStaticJavaMethod(
+                "com.microsoft.hyperspace.index.plananalysis.PlanAnalyzer",
+                "explainString",
+                df,
+                _spark,
+                Indexes(),
+                verbose);
+            redirectFunc(explainString);
+        }
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceSparkSessionExtensions.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceSparkSessionExtensions.cs
new file mode 100644
index 000000000..3c43f369c
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceSparkSessionExtensions.cs
@@ -0,0 +1,55 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Ipc;
+using Microsoft.Spark.Sql;
+
+namespace Microsoft.Spark.Extensions.Hyperspace
+{
+    /// <summary>
+    /// Hyperspace-specific extension methods on <see cref="SparkSession"/>.
+    /// </summary>
+    public static class HyperspaceSparkSessionExtensions
+    {
+        private static readonly string s_pythonUtilsClassName =
+            "com.microsoft.hyperspace.util.PythonUtils";
+
+        /// <summary>
+        /// Plug in Hyperspace-specific rules.
+        /// </summary>
+        /// <param name="session">A spark session that does not contain Hyperspace-specific rules.
+        /// </param>
+        /// <returns>A spark session that contains Hyperspace-specific rules.</returns>
+        public static SparkSession EnableHyperspace(this SparkSession session) =>
+            new SparkSession(
+                (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                    s_pythonUtilsClassName,
+                    "enableHyperspace",
+                    session));
+
+        /// <summary>
+        /// Plug out Hyperspace-specific rules.
+        /// </summary>
+        /// <param name="session">A spark session that contains Hyperspace-specific rules.</param>
+        /// <returns>A spark session that does not contain Hyperspace-specific rules.</returns>
+        public static SparkSession DisableHyperspace(this SparkSession session) =>
+            new SparkSession(
+                (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                    s_pythonUtilsClassName,
+                    "disableHyperspace",
+                    session));
+
+        /// <summary>
+        /// Checks if Hyperspace is enabled or not.
+        /// </summary>
+        /// <param name="session"></param>
+        /// <returns>True if Hyperspace is enabled or false otherwise.</returns>
+        public static bool IsHyperspaceEnabled(this SparkSession session) =>
+            (bool)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                    s_pythonUtilsClassName,
+                    "isHyperspaceEnabled",
+                    session);
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/Builder.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/Builder.cs
new file mode 100644
index 000000000..4623de3e7
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/Builder.cs
@@ -0,0 +1,74 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.Extensions.Hyperspace.Index
+{
+    /// <summary>
+    /// Builder for <see cref="IndexConfig"/>.
+    /// </summary>
+    public sealed class Builder : IJvmObjectReferenceProvider
+    {
+        private readonly JvmObjectReference _jvmObject;
+
+        internal Builder(JvmObjectReference jvmObject)
+        {
+            _jvmObject = jvmObject;
+        }
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+
+        /// <summary>
+        /// Updates index name for <see cref="IndexConfig"/>.
+        /// </summary>
+        /// <param name="indexName">Index name for the <see cref="IndexConfig"/>.</param>
+        /// <returns>An <see cref="Builder"/> object with updated indexname.</returns>
+        public Builder IndexName(string indexName)
+        {
+            _jvmObject.Invoke("indexName", indexName);
+            return this;
+        }
+
+        /// <summary>
+        /// Updates column names for <see cref="IndexConfig"/>.
+        ///
+        /// Note: API signature supports passing one or more argument.
+        /// </summary>
+        /// <param name="indexedColumn">Indexed column for the
+        /// <see cref="IndexConfig"/>.</param>
+        /// <param name="indexedColumns">Indexed columns for the
+        /// <see cref="IndexConfig"/>.</param>
+        /// <returns>An <see cref="Builder"/> object with updated indexed columns.</returns>
+        public Builder IndexBy(string indexedColumn, params string[] indexedColumns)
+        {
+            _jvmObject.Invoke("indexBy", indexedColumn, indexedColumns);
+            return this;
+        }
+
+        /// <summary>
+        /// Updates included columns for <see cref="IndexConfig"/>.
+        /// 
+        /// Note: API signature supports passing one or more argument.
+        /// </summary>
+        /// <param name="includedColumn">Included column for <see cref="IndexConfig"/>.
+        /// </param>
+        /// <param name="includedColumns">Included columns for <see cref="IndexConfig"/>.
+        /// </param>
+        /// <returns>An <see cref="Builder"/> object with updated included columns.</returns>
+        public Builder Include(string includedColumn, params string[] includedColumns)
+        {
+            _jvmObject.Invoke("include", includedColumn, includedColumns);
+            return this;
+        }
+
+        /// <summary>
+        /// Creates IndexConfig from supplied index name, indexed columns and included columns
+        /// to <see cref="Builder"/>.
+        /// </summary>
+        /// <returns>An <see cref="IndexConfig"/> object.</returns>
+        public IndexConfig Create() =>
+            new IndexConfig((JvmObjectReference)_jvmObject.Invoke("create"));
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/IndexConfig.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/IndexConfig.cs
new file mode 100644
index 000000000..030dda2ca
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Index/IndexConfig.cs
@@ -0,0 +1,92 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Internal.Scala;
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.Extensions.Hyperspace.Index
+{
+    /// <summary>
+    /// <see cref="IndexConfig"/> specifies the configuration of an index.
+    /// </summary>
+    public sealed class IndexConfig : IJvmObjectReferenceProvider
+    {
+        private static readonly string s_className = "com.microsoft.hyperspace.index.IndexConfig";
+        private readonly JvmObjectReference _jvmObject;
+
+        /// <summary>
+        /// <see cref="IndexConfig"/> specifies the configuration of an index.
+        /// </summary>
+        /// <param name="indexName">Index name.</param>
+        /// <param name="indexedColumns">Columns from which an index is created.</param>
+        public IndexConfig(string indexName, IEnumerable<string> indexedColumns)
+            : this(indexName, indexedColumns, new string[] { })
+        {
+        }
+
+        /// <summary>
+        /// <see cref="IndexConfig"/> specifies the configuration of an index.
+        /// </summary>
+        /// <param name="indexName">Index name.</param>
+        /// <param name="indexedColumns">Columns from which an index is created.</param>
+        /// <param name="includedColumns">Columns to be included in the index.</param>
+        public IndexConfig(
+            string indexName,
+            IEnumerable<string> indexedColumns,
+            IEnumerable<string> includedColumns)
+        {
+            IndexName = indexName;
+            IndexedColumns = new List<string>(indexedColumns);
+            IncludedColumns = new List<string>(includedColumns);
+
+            _jvmObject = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                s_className,
+                "apply",
+                IndexName,
+                IndexedColumns,
+                IncludedColumns);
+        }
+
+        /// <summary>
+        /// <see cref="IndexConfig"/> specifies the configuration of an index.
+        /// </summary>
+        /// <param name="jvmObject">JVM object reference.</param>
+        internal IndexConfig(JvmObjectReference jvmObject)
+        {
+            _jvmObject = jvmObject;
+            IndexName = (string)_jvmObject.Invoke("indexName");
+            IndexedColumns = new List<string>(
+                new Seq<string>((JvmObjectReference)_jvmObject.Invoke("indexedColumns")));
+            IncludedColumns = new List<string>(
+                new Seq<string>((JvmObjectReference)_jvmObject.Invoke("includedColumns")));
+        }
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+
+        public string IndexName { get; private set; }
+
+        public List<string> IndexedColumns { get; private set; }
+
+        public List<string> IncludedColumns { get; private set; }
+
+        /// <summary>
+        /// Creates new <see cref="Builder"/> for constructing an
+        /// <see cref="IndexConfig"/>.
+        /// </summary>
+        /// <returns>An <see cref="Builder"/> object.</returns>
+        public static Builder Builder() =>
+            new Builder(
+                (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                    s_className,
+                    "builder"));
+
+        public override bool Equals(object that) => (bool)_jvmObject.Invoke("equals", that);
+
+        public override int GetHashCode() => (int)_jvmObject.Invoke("hashCode");
+
+        public override string ToString() => (string)_jvmObject.Invoke("toString");
+    }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Microsoft.Spark.Extensions.Hyperspace.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Microsoft.Spark.Extensions.Hyperspace.csproj
new file mode 100644
index 000000000..d85c62f71
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Microsoft.Spark.Extensions.Hyperspace.csproj
@@ -0,0 +1,13 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFrameworks>netstandard2.0;netstandard2.1</TargetFrameworks>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+    <IsPackable>true</IsPackable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
index e03519853..7a6240ecc 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
+++ b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
@@ -12,6 +12,7 @@
 
   <ItemGroup>
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Delta.E2ETest" />
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Hyperspace.E2ETest" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/csharp/Microsoft.Spark.sln b/src/csharp/Microsoft.Spark.sln
index 73047bff3..75c071377 100644
--- a/src/csharp/Microsoft.Spark.sln
+++ b/src/csharp/Microsoft.Spark.sln
@@ -39,6 +39,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Spark.Extensions.
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest", "Extensions\Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest\Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj", "{7BDE09ED-04B3-41B2-A466-3D6F7225291E}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.Spark.Extensions.Hyperspace", "Extensions\Microsoft.Spark.Extensions.Hyperspace\Microsoft.Spark.Extensions.Hyperspace.csproj", "{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Spark.Extensions.Hyperspace.E2ETest", "Extensions\Microsoft.Spark.Extensions.Hyperspace.E2ETest\Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj", "{C6019E44-C777-4DE2-B70E-EA025B7D044D}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -101,6 +105,14 @@ Global
 		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{7BDE09ED-04B3-41B2-A466-3D6F7225291E}.Release|Any CPU.Build.0 = Release|Any CPU
+		{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C6019E44-C777-4DE2-B70E-EA025B7D044D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C6019E44-C777-4DE2-B70E-EA025B7D044D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C6019E44-C777-4DE2-B70E-EA025B7D044D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C6019E44-C777-4DE2-B70E-EA025B7D044D}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -113,6 +125,8 @@ Global
 		{47652C7D-B076-4FD9-98AC-959E38BE18E3} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 		{9C32014D-8C0C-40F1-9ABA-C3BF19687E5C} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 		{7BDE09ED-04B3-41B2-A466-3D6F7225291E} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
+		{70DDA4E9-1195-4A29-9AA1-96A8223A6D4F} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
+		{C6019E44-C777-4DE2-B70E-EA025B7D044D} = {71A19F75-8279-40AB-BEA0-7D4B153FC416}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {FD15FFDB-EA1B-436F-841D-3386DDF94538}
diff --git a/src/csharp/Microsoft.Spark/Interop/Internal/Scala/Seq.cs b/src/csharp/Microsoft.Spark/Interop/Internal/Scala/Seq.cs
new file mode 100644
index 000000000..9d9ed3bc1
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Internal/Scala/Seq.cs
@@ -0,0 +1,41 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections;
+using System.Collections.Generic;
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.Interop.Internal.Scala
+{
+    /// <summary>
+    /// Limited read-only implementation of Scala Seq[T] so that Seq objects can be read
+    /// into POCO collection types such as List.
+    /// </summary>
+    /// <typeparam name="T"></typeparam>
+    internal sealed class Seq<T> : IJvmObjectReferenceProvider, IEnumerable<T>
+    {
+        private readonly JvmObjectReference _jvmObject;
+
+        internal Seq(JvmObjectReference jvmObject)
+        {
+            _jvmObject = jvmObject;
+        }
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+
+        IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+        public int Size => (int)_jvmObject.Invoke("size");
+
+        public IEnumerator<T> GetEnumerator()
+        {
+            for (int i = 0; i < Size; ++i)
+            {
+                yield return Apply(i);
+            }
+        }
+
+        public T Apply(int index) => (T)_jvmObject.Invoke("apply", index);
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
index 050a43493..2cddc5627 100644
--- a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
+++ b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj
@@ -19,6 +19,8 @@
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Delta.E2ETest" />
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.DotNet.Interactive" />
     <InternalsVisibleTo Include="Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest" />
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Hyperspace" />
+    <InternalsVisibleTo Include="Microsoft.Spark.Extensions.Hyperspace.E2ETest" />
     <InternalsVisibleTo Include="Microsoft.Spark.UnitTest" />
     <InternalsVisibleTo Include="Microsoft.Spark.Worker" />
     <InternalsVisibleTo Include="Microsoft.Spark.Worker.UnitTest" />

From 1cd9ccab7ccf62f4483004a8245e42c77cf9ef61 Mon Sep 17 00:00:00 2001
From: Niharika Dutta <nidutta@microsoft.com>
Date: Mon, 22 Jun 2020 15:55:49 -0700
Subject: [PATCH 12/27] UDF bug fix caused by ThreadStatic
 BroadcastVariablesRegistry (#551)

---
 .../UdfTests/UdfSimpleTypesTests.cs           | 25 +++++++++++++++++++
 src/csharp/Microsoft.Spark/Broadcast.cs       | 13 +++++-----
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs b/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs
index e4c4cabb9..92422c205 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs
@@ -5,6 +5,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Threading;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Types;
 using Xunit;
@@ -166,5 +167,29 @@ public void TestUdfWithReturnAsTimestampType()
                 }
             }
         }
+
+        /// <summary>
+        /// Test to validate UDFs defined in separate threads work.
+        /// </summary>
+        [Fact]
+        public void TestUdfWithMultipleThreads()
+        {
+            try
+            {
+                void DefineUdf() => Udf<string, string>(str => str);
+
+                // Define a UDF in the main thread.
+                Udf<string, string>(str => str);
+
+                // Verify a UDF can be defined in a separate thread.
+                Thread t = new Thread(DefineUdf);
+                t.Start();
+                t.Join();
+            }
+            catch (Exception)
+            {
+                Assert.True(false);
+            }
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Broadcast.cs b/src/csharp/Microsoft.Spark/Broadcast.cs
index 20ae5c869..2791ec546 100644
--- a/src/csharp/Microsoft.Spark/Broadcast.cs
+++ b/src/csharp/Microsoft.Spark/Broadcast.cs
@@ -4,6 +4,7 @@
 using System.IO;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
+using System.Threading;
 using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Services;
@@ -261,28 +262,26 @@ internal static void Remove(long bid)
     /// </summary>
     internal static class JvmBroadcastRegistry
     {
-        [ThreadStatic]
-        private static readonly List<JvmObjectReference> s_jvmBroadcastVariables =
-            new List<JvmObjectReference>();
+        private static ThreadLocal<List<JvmObjectReference>> s_jvmBroadcastVariables = 
+            new ThreadLocal<List<JvmObjectReference>>(() => new List<JvmObjectReference>());
 
         /// <summary>
         /// Adds a JVMObjectReference object of type <see cref="Broadcast{T}"/> to the list.
         /// </summary>
         /// <param name="broadcastJvmObject">JVMObjectReference of the Broadcast variable</param>
         internal static void Add(JvmObjectReference broadcastJvmObject) =>
-            s_jvmBroadcastVariables.Add(broadcastJvmObject);
+            s_jvmBroadcastVariables.Value.Add(broadcastJvmObject);
 
         /// <summary>
         /// Clears s_jvmBroadcastVariables of all the JVMObjectReference objects of type
         /// <see cref="Broadcast{T}"/>.
         /// </summary>
-        internal static void Clear() => s_jvmBroadcastVariables.Clear();
+        internal static void Clear() => s_jvmBroadcastVariables.Value.Clear();
 
         /// <summary>
         /// Returns the static member s_jvmBroadcastVariables.
         /// </summary>
         /// <returns>A list of all broadcast objects of type <see cref="JvmObjectReference"/></returns>
-        internal static List<JvmObjectReference> GetAll() => s_jvmBroadcastVariables;
+        internal static List<JvmObjectReference> GetAll() => s_jvmBroadcastVariables.Value;
     }
 }
-

From 29ad2cb4f4d9c83c8e97e41c614ed115706425a2 Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Tue, 23 Jun 2020 10:03:28 -0700
Subject: [PATCH 13/27] Expose DataStreamWriter.ForeachBatch API (#549)

---
 .../Sql/Streaming/DataStreamWriterTests.cs    |  73 +++++
 .../Microsoft.Spark.UnitTest/CallbackTests.cs | 239 +++++++++++++++
 .../Microsoft.Spark.UnitTest/SparkFixture.cs  |   1 -
 .../TestUtils/XunitConsoleOutHelper.cs        |  34 +++
 .../CommandExecutorTests.cs                   |   1 -
 .../DaemonWorkerTests.cs                      |   4 -
 .../TaskRunnerTests.cs                        |   3 -
 .../Interop/Ipc/CallbackConnection.cs         | 280 ++++++++++++++++++
 .../Interop/Ipc/CallbackServer.cs             | 256 ++++++++++++++++
 .../Ipc/ForeachBatchCallbackHandler.cs        |  36 +++
 .../Interop/Ipc/ICallbackHandler.cs           |  16 +
 .../Interop/SparkEnvironment.cs               |   9 +
 .../Network/DefaultSocketWrapper.cs           |   5 +
 .../Microsoft.Spark/Network/ISocketWrapper.cs |   5 +
 .../Sql/Streaming/DataStreamWriter.cs         |  28 ++
 .../Sql/Streaming/StreamingQuery.cs           |  10 +
 .../spark/api/dotnet/CallbackClient.scala     |  73 +++++
 .../spark/api/dotnet/CallbackConnection.scala | 112 +++++++
 .../spark/api/dotnet/DotnetBackend.scala      |  46 ++-
 .../api/dotnet/DotnetBackendHandler.scala     |  29 +-
 .../spark/api/dotnet/DotnetException.scala    |  13 +
 .../spark/api/dotnet/CallbackClient.scala     |  73 +++++
 .../spark/api/dotnet/CallbackConnection.scala | 112 +++++++
 .../spark/api/dotnet/DotnetBackend.scala      |  46 ++-
 .../api/dotnet/DotnetBackendHandler.scala     |  29 +-
 .../spark/api/dotnet/DotnetException.scala    |  13 +
 .../sql/api/dotnet/DotnetForeachBatch.scala   |  34 +++
 .../spark/api/dotnet/CallbackClient.scala     |  73 +++++
 .../spark/api/dotnet/CallbackConnection.scala | 112 +++++++
 .../spark/api/dotnet/DotnetBackend.scala      |  46 ++-
 .../api/dotnet/DotnetBackendHandler.scala     |  29 +-
 .../spark/api/dotnet/DotnetException.scala    |  13 +
 .../sql/api/dotnet/DotnetForeachBatch.scala   |  34 +++
 33 files changed, 1731 insertions(+), 156 deletions(-)
 create mode 100644 src/csharp/Microsoft.Spark.UnitTest/CallbackTests.cs
 create mode 100644 src/csharp/Microsoft.Spark.UnitTest/TestUtils/XunitConsoleOutHelper.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/CallbackConnection.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/CallbackServer.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/ForeachBatchCallbackHandler.cs
 create mode 100644 src/csharp/Microsoft.Spark/Interop/Ipc/ICallbackHandler.cs
 create mode 100644 src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
 create mode 100644 src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
 create mode 100644 src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
 create mode 100644 src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
 create mode 100644 src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
 create mode 100644 src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
 create mode 100644 src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala
 create mode 100644 src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
 create mode 100644 src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
 create mode 100644 src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
 create mode 100644 src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
index 15c2a22a7..0983035f4 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
@@ -6,6 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using System.Threading;
 using Microsoft.Spark.E2ETest.Utils;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Streaming;
@@ -67,6 +68,69 @@ public void TestSignaturesV2_3_X()
             Assert.IsType<DataStreamWriter>(dsw.Trigger(Trigger.Once()));
         }
 
+        [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
+        public void TestForeachBatch()
+        {
+            // Temporary folder to put our test stream input.
+            using var srcTempDirectory = new TemporaryDirectory();
+            // Temporary folder to write ForeachBatch output.
+            using var dstTempDirectory = new TemporaryDirectory();
+
+            Func<Column, Column> outerUdf = Udf<int, int>(i => i + 100);
+
+            // id column: [0, 1, ..., 9]
+            WriteCsv(0, 10, Path.Combine(srcTempDirectory.Path, "input1.csv"));
+
+            DataStreamWriter dsw = _spark
+                .ReadStream()
+                .Schema("id INT")
+                .Csv(srcTempDirectory.Path)
+                .WriteStream()
+                .ForeachBatch((df, id) =>
+                {
+                    Func<Column, Column> innerUdf = Udf<int, int>(i => i + 200);
+                    df.Select(outerUdf(innerUdf(Col("id"))))
+                        .Write()
+                        .Csv(Path.Combine(dstTempDirectory.Path, id.ToString()));
+                });
+
+            StreamingQuery sq = dsw.Start();
+
+            // Process until all available data in the source has been processed and committed
+            // to the ForeachBatch sink. 
+            sq.ProcessAllAvailable();
+
+            // Add new file to the source path. The spark stream will read any new files
+            // added to the source path.
+            // id column: [10, 11, ..., 19]
+            WriteCsv(10, 10, Path.Combine(srcTempDirectory.Path, "input2.csv"));
+
+            // Process until all available data in the source has been processed and committed
+            // to the ForeachBatch sink.
+            sq.ProcessAllAvailable();
+            sq.Stop();
+
+            // Verify folders in the destination path.
+            string[] csvPaths =
+                Directory.GetDirectories(dstTempDirectory.Path).OrderBy(s => s).ToArray();
+            var expectedPaths = new string[]
+            {
+                Path.Combine(dstTempDirectory.Path, "0"),
+                Path.Combine(dstTempDirectory.Path, "1"),
+            };
+            Assert.True(expectedPaths.SequenceEqual(csvPaths));
+
+            // Read the generated csv paths and verify contents.
+            DataFrame df = _spark
+                .Read()
+                .Schema("id INT")
+                .Csv(csvPaths[0], csvPaths[1])
+                .Sort("id");
+
+            IEnumerable<int> actualIds = df.Collect().Select(r => r.GetAs<int>("id"));
+            Assert.True(Enumerable.Range(300, 20).SequenceEqual(actualIds));
+        }
+
         [SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
         public void TestForeach()
         {
@@ -200,6 +264,15 @@ private void TestAndValidateForeach(
                 foreachWriterOutputDF.Collect().Select(r => r.Values));
         }
 
+        private void WriteCsv(int start, int count, string path)
+        {
+            using var streamWriter = new StreamWriter(path);
+            foreach (int i in Enumerable.Range(start, count))
+            {
+                streamWriter.WriteLine(i);
+            }
+        }
+
         [Serializable]
         private class TestForeachWriter : IForeachWriter
         {
diff --git a/src/csharp/Microsoft.Spark.UnitTest/CallbackTests.cs b/src/csharp/Microsoft.Spark.UnitTest/CallbackTests.cs
new file mode 100644
index 000000000..04266e814
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/CallbackTests.cs
@@ -0,0 +1,239 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Spark.Interop.Ipc;
+using Microsoft.Spark.Network;
+using Moq;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+    [Collection("Spark Unit Tests")]
+    public class CallbackTests
+    {
+        private readonly Mock<IJvmBridge> _mockJvm;
+
+        public CallbackTests(SparkFixture fixture)
+        {
+            _mockJvm = fixture.MockJvm;
+        }
+
+        [Fact]
+        public async Task TestCallbackIds()
+        {
+            int numToRegister = 100;
+            var callbackServer = new CallbackServer(_mockJvm.Object, false);
+            var callbackHandler = new TestCallbackHandler();
+
+            var ids = new ConcurrentBag<int>();
+            var tasks = new List<Task>();
+            for (int i = 0; i < numToRegister; ++i)
+            {
+                tasks.Add(
+                    Task.Run(() => ids.Add(callbackServer.RegisterCallback(callbackHandler))));
+            }
+
+            await Task.WhenAll(tasks);
+
+            IOrderedEnumerable<int> actualIds = ids.OrderBy(i => i);
+            IEnumerable<int> expectedIds = Enumerable.Range(1, numToRegister);
+            Assert.True(expectedIds.SequenceEqual(actualIds));
+        }
+
+        [Fact]
+        public void TestCallbackServer()
+        {
+            var callbackServer = new CallbackServer(_mockJvm.Object, false);
+            var callbackHandler = new TestCallbackHandler();
+
+            callbackHandler.Id = callbackServer.RegisterCallback(callbackHandler);
+            Assert.Equal(1, callbackHandler.Id);
+
+            using ISocketWrapper callbackSocket = SocketFactory.CreateSocket();
+            callbackServer.Run(callbackSocket);
+
+            int connectionNumber = 10;
+            for (int i = 0; i < connectionNumber; ++i)
+            {
+                var ipEndpoint = (IPEndPoint)callbackSocket.LocalEndPoint;
+                ISocketWrapper clientSocket = SocketFactory.CreateSocket();
+                clientSocket.Connect(ipEndpoint.Address, ipEndpoint.Port);
+
+                WriteAndReadTestData(clientSocket, callbackHandler, i, new CancellationToken());
+            }
+
+            Assert.Equal(connectionNumber, callbackServer.CurrentNumConnections);
+
+            IOrderedEnumerable<int> actualValues = callbackHandler.Inputs.OrderBy(i => i);
+            IEnumerable<int> expectedValues = Enumerable
+                .Range(0, connectionNumber)
+                .Select(i => callbackHandler.Apply(i))
+                .OrderBy(i => i);
+            Assert.True(expectedValues.SequenceEqual(actualValues));
+        }
+
+        [Fact]
+        public void TestCallbackHandlers()
+        {
+            var tokenSource = new CancellationTokenSource();
+            var callbackHandlersDict = new ConcurrentDictionary<int, ICallbackHandler>();
+            int inputToHandler = 1;
+            {
+                // Test CallbackConnection using a ICallbackHandler that runs
+                // normally without error.
+                var callbackHandler = new TestCallbackHandler
+                {
+                    Id = 1
+                };
+                callbackHandlersDict[callbackHandler.Id] = callbackHandler;
+                TestCallbackConnection(
+                    callbackHandlersDict,
+                    callbackHandler,
+                    inputToHandler,
+                    tokenSource.Token);
+                Assert.Single(callbackHandler.Inputs);
+                Assert.Equal(
+                    callbackHandler.Apply(inputToHandler),
+                    callbackHandler.Inputs.First());
+            }
+            {
+                // Test CallbackConnection using a ICallbackHandler that 
+                // throws an exception.
+                var callbackHandler = new ThrowsExceptionHandler
+                {
+                    Id = 2
+                };
+                callbackHandlersDict[callbackHandler.Id] = callbackHandler;
+                TestCallbackConnection(
+                    callbackHandlersDict,
+                    callbackHandler,
+                    inputToHandler,
+                    tokenSource.Token);
+                Assert.Empty(callbackHandler.Inputs);
+            }
+            {
+                // Test CallbackConnection when cancellation has been requested for the token.
+                tokenSource.Cancel();
+                var callbackHandler = new TestCallbackHandler
+                {
+                    Id = 3
+                };
+                callbackHandlersDict[callbackHandler.Id] = callbackHandler;
+                TestCallbackConnection(
+                    callbackHandlersDict,
+                    callbackHandler,
+                    inputToHandler,
+                    tokenSource.Token);
+                Assert.Empty(callbackHandler.Inputs);
+            }
+        }
+
+        private void TestCallbackConnection(
+            ConcurrentDictionary<int, ICallbackHandler> callbackHandlersDict,
+            ITestCallbackHandler callbackHandler,
+            int inputToHandler,
+            CancellationToken token)
+        {
+            using ISocketWrapper serverListener = SocketFactory.CreateSocket();
+            serverListener.Listen();
+
+            var ipEndpoint = (IPEndPoint)serverListener.LocalEndPoint;
+            ISocketWrapper clientSocket = SocketFactory.CreateSocket();
+            clientSocket.Connect(ipEndpoint.Address, ipEndpoint.Port);
+
+            var callbackConnection = new CallbackConnection(0, clientSocket, callbackHandlersDict);
+            Task.Run(() => callbackConnection.Run(token));
+
+            using ISocketWrapper serverSocket = serverListener.Accept();
+            WriteAndReadTestData(serverSocket, callbackHandler, inputToHandler, token);
+        }
+
+        private void WriteAndReadTestData(
+            ISocketWrapper socket,
+            ITestCallbackHandler callbackHandler,
+            int inputToHandler,
+            CancellationToken token)
+        {
+            Stream inputStream = socket.InputStream;
+            Stream outputStream = socket.OutputStream;
+
+            SerDe.Write(outputStream, (int)CallbackFlags.CALLBACK);
+            SerDe.Write(outputStream, callbackHandler.Id);
+            SerDe.Write(outputStream, sizeof(int));
+            SerDe.Write(outputStream, inputToHandler);
+            SerDe.Write(outputStream, (int)CallbackFlags.END_OF_STREAM);
+            outputStream.Flush();
+
+            if (token.IsCancellationRequested)
+            {
+                Assert.Throws<IOException>(() => SerDe.ReadInt32(inputStream));
+            }
+            else
+            {
+                int callbackFlag = SerDe.ReadInt32(inputStream);
+                if (callbackFlag == (int)CallbackFlags.DOTNET_EXCEPTION_THROWN)
+                {
+                    string exceptionMessage = SerDe.ReadString(inputStream);
+                    Assert.False(string.IsNullOrEmpty(exceptionMessage));
+                    Assert.Contains(callbackHandler.ExceptionMessage, exceptionMessage);
+                }
+                else
+                {
+                    Assert.Equal((int)CallbackFlags.END_OF_STREAM, callbackFlag);
+                }
+            }
+        }
+
+        private class TestCallbackHandler : ICallbackHandler, ITestCallbackHandler
+        {
+            public void Run(Stream inputStream) => Inputs.Add(Apply(SerDe.ReadInt32(inputStream)));
+
+            public ConcurrentBag<int> Inputs { get; } = new ConcurrentBag<int>();
+
+            public int Id { get; set; }
+
+            public bool Throws { get; } = false;
+
+            public string ExceptionMessage => throw new NotImplementedException();
+
+            public int Apply(int i) => 10 * i;
+        }
+
+        private class ThrowsExceptionHandler : ICallbackHandler, ITestCallbackHandler
+        {               
+            public void Run(Stream inputStream) => throw new Exception(ExceptionMessage);
+
+            public ConcurrentBag<int> Inputs { get; } = new ConcurrentBag<int>();
+
+            public int Id { get; set; }
+
+            public bool Throws { get; } = true;
+
+            public string ExceptionMessage { get; } = "Dotnet Callback Handler Exception Message";
+
+            public int Apply(int i) => throw new NotImplementedException();
+        }
+
+        private interface ITestCallbackHandler
+        {
+            ConcurrentBag<int> Inputs { get; }
+
+            int Id { get; set; }
+
+            bool Throws { get; }
+
+            string ExceptionMessage { get; }
+
+            int Apply(int i);
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs b/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
index 02f2c8b3b..06c9a3fe2 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/SparkFixture.cs
@@ -3,7 +3,6 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
-using System.IO;
 using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Ipc;
 using Moq;
diff --git a/src/csharp/Microsoft.Spark.UnitTest/TestUtils/XunitConsoleOutHelper.cs b/src/csharp/Microsoft.Spark.UnitTest/TestUtils/XunitConsoleOutHelper.cs
new file mode 100644
index 000000000..f71630671
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/TestUtils/XunitConsoleOutHelper.cs
@@ -0,0 +1,34 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.IO;
+using Xunit.Abstractions;
+
+namespace Microsoft.Spark.UnitTest.TestUtils
+{
+    // Tests can subclass this to get Console output to display when using
+    // xUnit testing framework.
+    // Workaround found at https://github.com/microsoft/vstest/issues/799
+    public class XunitConsoleOutHelper : IDisposable
+    {
+        private readonly ITestOutputHelper _output;
+        private readonly TextWriter _originalOut;
+        private readonly TextWriter _textWriter;
+
+        public XunitConsoleOutHelper(ITestOutputHelper output)
+        {
+            _output = output;
+            _originalOut = Console.Out;
+            _textWriter = new StringWriter();
+            Console.SetOut(_textWriter);
+        }
+
+        public void Dispose()
+        {
+            _output.WriteLine(_textWriter.ToString());
+            Console.SetOut(_originalOut);
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/CommandExecutorTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/CommandExecutorTests.cs
index 589e0ea0b..8978e321e 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/CommandExecutorTests.cs
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/CommandExecutorTests.cs
@@ -2,7 +2,6 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System;
 using System.Collections;
 using System.Collections.Generic;
 using System.IO;
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
index 5fac38035..d5800bb1e 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/DaemonWorkerTests.cs
@@ -3,14 +3,10 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
-using System.Collections;
 using System.Collections.Generic;
-using System.IO;
 using System.Net;
 using System.Threading.Tasks;
-using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Network;
-using Razorvine.Pickle;
 using Xunit;
 
 namespace Microsoft.Spark.Worker.UnitTest
diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/TaskRunnerTests.cs b/src/csharp/Microsoft.Spark.Worker.UnitTest/TaskRunnerTests.cs
index 436a45940..86a254f4b 100644
--- a/src/csharp/Microsoft.Spark.Worker.UnitTest/TaskRunnerTests.cs
+++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/TaskRunnerTests.cs
@@ -2,13 +2,10 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
-using System.Collections;
 using System.Collections.Generic;
 using System.Net;
 using System.Threading.Tasks;
-using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Network;
-using Razorvine.Pickle;
 using Xunit;
 
 namespace Microsoft.Spark.Worker.UnitTest
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackConnection.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackConnection.cs
new file mode 100644
index 000000000..512318429
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackConnection.cs
@@ -0,0 +1,280 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Buffers.Binary;
+using System.Collections.Concurrent;
+using System.IO;
+using System.Threading;
+using Microsoft.Spark.Network;
+using Microsoft.Spark.Services;
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    /// <summary>
+    /// CallbackConnection is used to process the callback communication between
+    /// Dotnet and the JVM. It uses a TCP socket to communicate with the JVM side
+    /// and the socket is expected to be reused.
+    /// </summary>
+    internal sealed class CallbackConnection
+    {
+        private static readonly ILoggerService s_logger =
+            LoggerServiceFactory.GetLogger(typeof(CallbackConnection));
+
+        private readonly ISocketWrapper _socket;
+
+        /// <summary>
+        /// Keeps track of all <see cref="ICallbackHandler"/>s by its Id. This is accessed
+        /// by the <see cref="CallbackServer"/> and the <see cref="CallbackConnection"/>.
+        /// </summary>
+        private readonly ConcurrentDictionary<int, ICallbackHandler> _callbackHandlers;
+
+        private volatile bool _isRunning = false;
+
+        private int _numCallbacksRun = 0;
+
+        internal CallbackConnection(
+            long connectionId,
+            ISocketWrapper socket,
+            ConcurrentDictionary<int, ICallbackHandler> callbackHandlers)
+        {
+            ConnectionId = connectionId;
+            _socket = socket;
+            _callbackHandlers = callbackHandlers;
+
+            s_logger.LogInfo(
+                $"[{ConnectionId}] Connected with RemoteEndPoint: {socket.RemoteEndPoint}");
+        }
+
+        private enum ConnectionStatus
+        {
+            /// <summary>
+            /// Connection is normal.
+            /// </summary>
+            OK,
+
+            /// <summary>
+            /// Socket is closed by the JVM.
+            /// </summary>
+            SOCKET_CLOSED,
+
+            /// <summary>
+            /// Request to close connection.
+            /// </summary>
+            REQUEST_CLOSE
+        }
+
+        internal long ConnectionId { get; }
+
+        /// <summary>
+        /// Run and start processing the callback connection.
+        /// </summary>
+        /// <param name="token">Cancellation token used to stop the connection.</param>
+        internal void Run(CancellationToken token)
+        {
+            _isRunning = true;
+            Stream inputStream = _socket.InputStream;
+            Stream outputStream = _socket.OutputStream;
+
+            token.Register(() => Stop());
+
+            try
+            {
+                while (_isRunning)
+                {
+                    ConnectionStatus connectionStatus =
+                        ProcessStream(inputStream, outputStream, out bool readComplete);
+
+                    if (connectionStatus == ConnectionStatus.OK)
+                    {
+                        outputStream.Flush();
+
+                        ++_numCallbacksRun;
+
+                        // If the socket is not read through completely, then it cannot be reused.
+                        if (!readComplete)
+                        {
+                            _isRunning = false;
+
+                            // Wait for server to complete to avoid 'connection reset' exception.
+                            s_logger.LogInfo(
+                                $"[{ConnectionId}] Sleep 500 millisecond to close socket.");
+                            Thread.Sleep(500);
+                        }
+                    }
+                    else if (connectionStatus == ConnectionStatus.REQUEST_CLOSE)
+                    {
+                        _isRunning = false;
+                        s_logger.LogInfo(
+                            $"[{ConnectionId}] Request to close connection received.");
+                    }
+                    else
+                    {
+                        _isRunning = false;
+                        s_logger.LogWarn($"[{ConnectionId}] Socket is closed by JVM.");
+                    }
+                }
+            }
+            catch (Exception e)
+            {
+                _isRunning = false;
+                s_logger.LogError($"[{ConnectionId}] Exiting with exception: {e}");
+            }
+            finally
+            {
+                try
+                {
+                    _socket.Dispose();
+                }
+                catch (Exception e)
+                {
+                    s_logger.LogWarn($"[{ConnectionId}] Exception while closing socket {e}");
+                }
+
+                s_logger.LogInfo(
+                    $"[{ConnectionId}] Finished running {_numCallbacksRun} callback(s).");
+            }
+        }
+
+        private void Stop()
+        {
+            _isRunning = false;
+            s_logger.LogInfo($"[{ConnectionId}] Stopping CallbackConnection.");
+        }
+
+        /// <summary>
+        /// Process the input and output streams.
+        /// </summary>
+        /// <param name="inputStream">The input stream.</param>
+        /// <param name="outputStream">The output stream.</param>
+        /// <param name="readComplete">True if stream is read completely, false otherwise.</param>
+        /// <returns>The connection status.</returns>
+        private ConnectionStatus ProcessStream(
+            Stream inputStream,
+            Stream outputStream,
+            out bool readComplete)
+        {
+            readComplete = false;
+
+            try
+            {
+                byte[] requestFlagBytes = SerDe.ReadBytes(inputStream, sizeof(int));
+                // For socket stream, read on the stream returns 0, which
+                // SerDe.ReadBytes() returns as null to denote the stream is closed.
+                if (requestFlagBytes == null)
+                {
+                    return ConnectionStatus.SOCKET_CLOSED;
+                }
+
+                // Check value of the initial request. Expected values are:
+                // - CallbackFlags.CLOSE
+                // - CallbackFlags.CALLBACK
+                int requestFlag = BinaryPrimitives.ReadInt32BigEndian(requestFlagBytes);
+                if (requestFlag == (int)CallbackFlags.CLOSE) {
+                    return ConnectionStatus.REQUEST_CLOSE;
+                }
+                else if (requestFlag != (int)CallbackFlags.CALLBACK)
+                {
+                    throw new Exception(
+                        string.Format(
+                            "Unexpected callback flag received. Expected: {0}, Received: {1}.",
+                            CallbackFlags.CALLBACK,
+                            requestFlag));
+                }
+
+                // Use callback id to get the registered handler.
+                int callbackId = SerDe.ReadInt32(inputStream);
+                if (!_callbackHandlers.TryGetValue(
+                        callbackId,
+                        out ICallbackHandler callbackHandler))
+                {
+                    throw new Exception($"Unregistered callback id: {callbackId}");
+                }
+
+                s_logger.LogInfo(
+                    string.Format(
+                        "[{0}] Received request for callback id: {1}, callback handler: {2}",
+                        ConnectionId,
+                        callbackId,
+                        callbackHandler));
+
+                // Save contents of callback handler data to be used later.
+                using var callbackDataStream =
+                    new MemoryStream(SerDe.ReadBytes(inputStream, SerDe.ReadInt32(inputStream)));
+
+                // Check the end of stream.
+                int endOfStream = SerDe.ReadInt32(inputStream);
+                if (endOfStream == (int)CallbackFlags.END_OF_STREAM)
+                {
+                    s_logger.LogDebug($"[{ConnectionId}] Received END_OF_STREAM signal.");
+
+                    // Run callback handler.
+                    callbackHandler.Run(callbackDataStream);
+
+                    SerDe.Write(outputStream, (int)CallbackFlags.END_OF_STREAM);
+                    readComplete = true;
+                }
+                else
+                {
+                    // This may happen when the input data is not read completely.
+                    s_logger.LogWarn(
+                        $"[{ConnectionId}] Unexpected end of stream: {endOfStream}.");
+
+                    // Write flag to indicate the connection should be closed.
+                    SerDe.Write(outputStream, (int)CallbackFlags.CLOSE);
+                }
+
+                return ConnectionStatus.OK;
+            }
+            catch (Exception e)
+            {
+                s_logger.LogError($"[{ConnectionId}] ProcessStream() failed with exception: {e}");
+
+                try
+                {
+                    SerDe.Write(outputStream, (int)CallbackFlags.DOTNET_EXCEPTION_THROWN);
+                    SerDe.Write(outputStream, e.ToString());
+                }
+                catch (IOException)
+                {
+                    // JVM closed the socket.
+                }
+                catch (Exception ex)
+                {
+                    s_logger.LogError(
+                        $"[{ConnectionId}] Writing exception to stream failed with exception: {ex}");
+                }
+
+                throw;
+            }
+        }
+    }
+
+    /// <summary>
+    /// Enums with which the Dotnet CallbackConnection communicates with
+    /// the JVM CallbackConnection.
+    /// </summary>
+    internal enum CallbackFlags : int
+    {
+        /// <summary>
+        /// Flag to indicate connection should be closed.
+        /// </summary>
+        CLOSE = -1,
+
+        /// <summary>
+        /// Flag to indiciate callback should be called.
+        /// </summary>
+        CALLBACK = -2,
+
+        /// <summary>
+        /// Flag to indicate an exception thrown from dotnet.
+        /// </summary>
+        DOTNET_EXCEPTION_THROWN = -3,
+
+        /// <summary>
+        /// Flag to indicate end of stream.
+        /// </summary>
+        END_OF_STREAM = -4
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackServer.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackServer.cs
new file mode 100644
index 000000000..ef6c0407a
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/CallbackServer.cs
@@ -0,0 +1,256 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Concurrent;
+using System.Net;
+using System.Threading;
+using Microsoft.Spark.Network;
+using Microsoft.Spark.Services;
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    /// <summary>
+    /// CallbackServer services callback requests from the JVM.
+    /// </summary>
+    internal sealed class CallbackServer
+    {
+        private static readonly ILoggerService s_logger =
+            LoggerServiceFactory.GetLogger(typeof(CallbackServer));
+
+        private readonly IJvmBridge _jvm;
+
+        /// <summary>
+        /// Keeps track of all <see cref="ICallbackHandler"/>s by its Id. This is accessed
+        /// by the <see cref="CallbackServer"/> and the <see cref="CallbackConnection"/>
+        /// running in the worker threads.
+        /// </summary>
+        private readonly ConcurrentDictionary<int, ICallbackHandler> _callbackHandlers =
+            new ConcurrentDictionary<int, ICallbackHandler>();
+
+        /// <summary>
+        /// Keeps track of all <see cref="CallbackConnection"/> objects identified by its
+        /// <see cref="CallbackConnection.ConnectionId"/>. The main thread creates a
+        /// <see cref="CallbackConnection"/> each time it receives a new socket connection
+        /// from the JVM side and inserts it into <see cref="_connections"/>. Each worker
+        /// thread calls <see cref="CallbackConnection.Run"/> and removes the connection
+        /// once this call is finished. <see cref="CallbackConnection.Run"/> will not return
+        /// unless the <see cref="CallbackConnection"/> needs to be closed.
+        /// Also, <see cref="_connections"/> is used to bound the number of worker threads
+        /// since it gives you the total number of active <see cref="CallbackConnection"/>s.
+        /// </summary>
+        private readonly ConcurrentDictionary<long, CallbackConnection> _connections =
+            new ConcurrentDictionary<long, CallbackConnection>();
+
+        /// <summary>
+        /// Each worker thread picks up a CallbackConnection from _waitingConnections
+        /// and runs it.
+        /// </summary>
+        private readonly BlockingCollection<CallbackConnection> _waitingConnections =
+            new BlockingCollection<CallbackConnection>();
+
+        /// <summary>
+        /// A <see cref="CancellationTokenSource"/> used to notify threads that operations
+        /// should be canceled.
+        /// </summary>
+        private readonly CancellationTokenSource _tokenSource = new CancellationTokenSource();
+
+        /// <summary>
+        /// Counter used to generate a unique id when registering a <see cref="ICallbackHandler"/>.
+        /// </summary>
+        private int _callbackCounter = 0;
+
+        private bool _isRunning = false;
+
+        private ISocketWrapper _listener;
+
+        internal int CurrentNumConnections => _connections.Count;
+
+        internal CallbackServer(IJvmBridge jvm, bool run = true)
+        {
+            AppDomain.CurrentDomain.ProcessExit += (s, e) => Shutdown();
+            _jvm = jvm;
+
+            if (run)
+            {
+                Run();
+            }
+        }
+
+        /// <summary>
+        /// Produce a unique id and register a <see cref="ICallbackHandler"/> with it.
+        /// </summary>
+        /// <param name="callbackHandler">The handler to register.</param>
+        /// <returns>A unique id associated with the handler.</returns>
+        internal int RegisterCallback(ICallbackHandler callbackHandler)
+        {
+            int callbackId = Interlocked.Increment(ref _callbackCounter);
+            _callbackHandlers[callbackId] = callbackHandler;
+
+            return callbackId;
+        }
+
+        /// <summary>
+        /// Runs the callback server.
+        /// </summary>
+        /// <param name="listener">The listening socket.</param>
+        internal void Run(ISocketWrapper listener)
+        {
+            if (_isRunning)
+            {
+                s_logger.LogWarn("CallbackServer is already running.");
+                return;
+            }
+
+            s_logger.LogInfo($"Starting CallbackServer.");
+            _isRunning = true;
+
+            try
+            {
+                _listener = listener;
+                _listener.Listen();
+
+                // Communicate with the JVM the callback server's address and port.
+                var localEndPoint = (IPEndPoint)_listener.LocalEndPoint;
+                _jvm.CallStaticJavaMethod(
+                    "DotnetHandler",
+                    "connectCallback",
+                    localEndPoint.Address.ToString(),
+                    localEndPoint.Port);
+
+                s_logger.LogInfo($"Started CallbackServer on {localEndPoint}");
+
+                // Start accepting connections from JVM.
+                new Thread(() => StartServer(_listener))
+                {
+                    IsBackground = true
+                }.Start();
+            }
+            catch (Exception e)
+            {
+                s_logger.LogError($"CallbackServer exiting with exception: {e}");
+                Shutdown();
+            }
+        }
+
+        /// <summary>
+        /// Runs the callback server.
+        /// </summary>
+        private void Run()
+        {
+            Run(SocketFactory.CreateSocket());
+        }
+
+        /// <summary>
+        /// Starts listening to any connection from JVM.
+        /// </summary>
+        /// <param name="listener"></param>
+        private void StartServer(ISocketWrapper listener)
+        {
+            try
+            {
+                long connectionId = 1;
+                int numWorkerThreads = 0;
+
+                while (_isRunning)
+                {
+                    ISocketWrapper socket = listener.Accept();
+                    var connection =
+                        new CallbackConnection(connectionId, socket, _callbackHandlers);
+
+                    _waitingConnections.Add(connection);
+                    _connections[connectionId] = connection;
+                    ++connectionId;
+
+                    int numConnections = CurrentNumConnections;
+
+                    // Start worker thread until there are at least as many worker threads
+                    // as there are CallbackConnections. CallbackConnections are expected
+                    // to stay open and reuse the socket to service repeated callback
+                    // requests. However, if there is an issue with a connection, then
+                    // CallbackConnection.Run will return, freeing up extra worker threads
+                    // to service any _waitingConnections.
+                    //
+                    // For example, 
+                    // Assume there were 5 worker threads, each servicing a CallbackConnection
+                    // (5 total healthy connections). If 2 CallbackConnection sockets closed
+                    // unexpectedly, then there would be 5 worker threads and 3 healthy
+                    // connections. If a new connection request arrived, then the
+                    // CallbackConnection would be added to the _waitingConnections collection
+                    // and no new worker threads would be started (2 worker threads are already
+                    // waiting to take CallbackConnections from _waitingConnections).
+                    while (numWorkerThreads < numConnections)
+                    {
+                        new Thread(RunWorkerThread)
+                        {
+                            IsBackground = true
+                        }.Start();
+                        ++numWorkerThreads;
+                    }
+
+                    s_logger.LogInfo(
+                        $"Pool snapshot: [NumThreads:{numWorkerThreads}], " +
+                        $"[NumConnections:{numConnections}]");
+                }
+            }
+            catch (Exception e)
+            {
+                s_logger.LogError($"StartServer() exits with exception: {e}");
+                Shutdown();
+            }
+        }
+
+        /// <summary>
+        /// <see cref="RunWorkerThread"/> is called for each worker thread when it starts.
+        /// <see cref="RunWorkerThread"/> doesn't return (except for the error cases), and
+        /// keeps pulling from <see cref="_waitingConnections"/> and runs the retrieved
+        /// <see cref="CallbackConnection"/>.
+        /// </summary>
+        private void RunWorkerThread()
+        {
+            try
+            {
+                while (_isRunning)
+                {
+                    if (_waitingConnections.TryTake(
+                        out CallbackConnection connection,
+                        Timeout.Infinite))
+                    {
+                        // The connection will only return when the connection is closing
+                        // (via CancellationToken) or there are error cases.
+                        connection.Run(_tokenSource.Token);
+
+                        // Assume the connection is in a bad state, and do not reuse it.
+                        // Remove it from _connections list to prevent the server thread from
+                        // creating more threads than needed.
+                        _connections.TryRemove(connection.ConnectionId, out CallbackConnection _);
+                    }
+                }
+            }
+            catch (Exception e)
+            {
+                s_logger.LogError($"RunWorkerThread() exits with an exception: {e}");
+                Shutdown();
+            }
+        }
+
+        /// <summary>
+        /// Shuts down the <see cref="CallbackServer"/> by canceling any running threads
+        /// and disposing of resources.
+        /// </summary>
+        private void Shutdown()
+        {
+            s_logger.LogInfo("Shutting down CallbackServer");
+
+            _tokenSource.Cancel();
+            _waitingConnections.Dispose();
+            _connections.Clear();
+            _callbackHandlers.Clear();
+            _listener?.Dispose();
+            _isRunning = false;
+
+            _jvm.CallStaticJavaMethod("DotnetHandler", "closeCallback");
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/ForeachBatchCallbackHandler.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/ForeachBatchCallbackHandler.cs
new file mode 100644
index 000000000..9ba6ee24a
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/ForeachBatchCallbackHandler.cs
@@ -0,0 +1,36 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.IO;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Sql.Streaming;
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    /// <summary>
+    /// <see cref="DataStreamWriter.ForeachBatch(Action{DataFrame, long})"/> callback handler.
+    /// </summary>
+    internal sealed class ForeachBatchCallbackHandler : ICallbackHandler
+    {
+        private readonly IJvmBridge _jvm;
+
+        private readonly Action<DataFrame, long> _func;
+
+        internal ForeachBatchCallbackHandler(IJvmBridge jvm, Action<DataFrame, long> func)
+        {
+            _jvm = jvm;
+            _func = func;
+        }
+
+        public void Run(Stream inputStream)
+        {
+            var batchDf =
+                new DataFrame(new JvmObjectReference(SerDe.ReadString(inputStream), _jvm));
+            long batchId = SerDe.ReadInt64(inputStream);
+
+            _func(batchDf, batchId);
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/ICallbackHandler.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/ICallbackHandler.cs
new file mode 100644
index 000000000..0bd280b01
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/ICallbackHandler.cs
@@ -0,0 +1,16 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.IO;
+
+namespace Microsoft.Spark.Interop.Ipc
+{
+    /// <summary>
+    /// Interface for handling callbacks between the JVM and Dotnet.
+    /// </summary>
+    internal interface ICallbackHandler
+    {
+        void Run(Stream inputStream);
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs b/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
index f2523d065..95fa5d586 100644
--- a/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
+++ b/src/csharp/Microsoft.Spark/Interop/SparkEnvironment.cs
@@ -84,5 +84,14 @@ internal static IConfigurationService ConfigurationService
                 s_configurationService = value;
             }
         }
+
+        private static CallbackServer s_callbackServer;
+        internal static CallbackServer CallbackServer
+        {
+            get
+            {
+                return s_callbackServer ??= new CallbackServer(JvmBridge);
+            }
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Network/DefaultSocketWrapper.cs b/src/csharp/Microsoft.Spark/Network/DefaultSocketWrapper.cs
index 8647a14cb..296bb67df 100644
--- a/src/csharp/Microsoft.Spark/Network/DefaultSocketWrapper.cs
+++ b/src/csharp/Microsoft.Spark/Network/DefaultSocketWrapper.cs
@@ -137,5 +137,10 @@ private Stream CreateStream(string bufferSizeEnvVarName)
         /// Returns the local endpoint.
         /// </summary>
         public EndPoint LocalEndPoint => _innerSocket.LocalEndPoint;
+
+        /// <summary>
+        /// Returns the remote endpoint.
+        /// </summary>
+        public EndPoint RemoteEndPoint => _innerSocket.RemoteEndPoint;
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Network/ISocketWrapper.cs b/src/csharp/Microsoft.Spark/Network/ISocketWrapper.cs
index c29d7637c..1dbba3c47 100644
--- a/src/csharp/Microsoft.Spark/Network/ISocketWrapper.cs
+++ b/src/csharp/Microsoft.Spark/Network/ISocketWrapper.cs
@@ -50,5 +50,10 @@ internal interface ISocketWrapper : IDisposable
         /// Returns the local endpoint.
         /// </summary>
         EndPoint LocalEndPoint { get; }
+
+        /// <summary>
+        /// Returns the remote endpoint.
+        /// </summary>
+        EndPoint RemoteEndPoint { get; }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs b/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs
index 2cf752459..f371b0665 100644
--- a/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs
+++ b/src/csharp/Microsoft.Spark/Sql/Streaming/DataStreamWriter.cs
@@ -2,7 +2,9 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System;
 using System.Collections.Generic;
+using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Sql.Types;
 using Microsoft.Spark.Utils;
@@ -205,6 +207,32 @@ public DataStreamWriter Foreach(IForeachWriter writer)
             return this;
         }
 
+        /// <summary>
+        /// Sets the output of the streaming query to be processed using the provided
+        /// function. This is supported only in the micro-batch execution modes (that
+        /// is, when the trigger is not continuous). In every micro-batch, the provided
+        /// function will be called in every micro-batch with (i) the output rows as a
+        /// <see cref="DataFrame"/> and (ii) the batch identifier. The batchId can be used
+        /// to deduplicate and transactionally write the output (that is, the provided
+        /// Dataset) to external systems. The output <see cref="DataFrame"/> is guaranteed
+        /// to exactly same for the same batchId (assuming all operations are deterministic
+        /// in the query).
+        /// </summary>
+        /// <param name="func">The function to apply to the DataFrame</param>
+        /// <returns>This DataStreamWriter object</returns>
+        [Since(Versions.V2_4_0)]
+        public DataStreamWriter ForeachBatch(Action<DataFrame, long> func)
+        {
+            int callbackId = SparkEnvironment.CallbackServer.RegisterCallback(
+                new ForeachBatchCallbackHandler(_jvmObject.Jvm, func));
+            _jvmObject.Jvm.CallStaticJavaMethod(
+                "org.apache.spark.sql.api.dotnet.DotnetForeachBatchHelper",
+                "callForeachBatch",
+                this,
+                callbackId);
+            return this;
+        }
+
         /// <summary>
         /// Helper function to add given key/value pair as a new option.
         /// </summary>
diff --git a/src/csharp/Microsoft.Spark/Sql/Streaming/StreamingQuery.cs b/src/csharp/Microsoft.Spark/Sql/Streaming/StreamingQuery.cs
index 7e948c076..645ca0e0c 100644
--- a/src/csharp/Microsoft.Spark/Sql/Streaming/StreamingQuery.cs
+++ b/src/csharp/Microsoft.Spark/Sql/Streaming/StreamingQuery.cs
@@ -46,6 +46,16 @@ public sealed class StreamingQuery : IJvmObjectReferenceProvider
         public bool AwaitTermination(long timeoutMs) =>
             (bool)_jvmObject.Invoke("awaitTermination", timeoutMs);
 
+        /// <summary>
+        /// Blocks until all available data in the source has been processed and committed to the
+        /// sink. This method is intended for testing. Note that in the case of continually
+        /// arriving data, this method may block forever. Additionally, this method is only
+        /// guaranteed to block until data that has been synchronously appended data to a
+        /// `org.apache.spark.sql.execution.streaming.Source` prior to invocation.
+        /// (i.e. `getOffset` must immediately reflect the addition).
+        /// </summary>
+        public void ProcessAllAvailable() => _jvmObject.Invoke("processAllAvailable");
+
         /// <summary>
         /// Stops the execution of this query if it is running. This method blocks until the
         /// threads performing execution stop.
diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
new file mode 100644
index 000000000..0026d78df
--- /dev/null
+++ b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.DataOutputStream
+
+import org.apache.spark.internal.Logging
+
+import scala.collection.mutable.Queue
+
+/**
+ * CallbackClient is used to communicate with the Dotnet CallbackServer.
+ * The client manages and maintains a pool of open CallbackConnections.
+ * Any callback request is delegated to a new CallbackConnection or
+ * unused CallbackConnection.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackClient(address: String, port: Int) extends Logging {
+  private[this] val connectionPool: Queue[CallbackConnection] = Queue[CallbackConnection]()
+
+  private[this] var isShutdown: Boolean = false
+
+  final def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit =
+    getOrCreateConnection() match {
+      case Some(connection) =>
+        try {
+          connection.send(callbackId, writeBody)
+        } catch {
+          case e: Exception =>
+            logError(s"Error calling callback [callback id = $callbackId].", e)
+            connection.close()
+            throw e
+        }
+      case None => throw new Exception("Unable to get or create connection.")
+    }
+
+  private def getOrCreateConnection(): Option[CallbackConnection] = synchronized {
+    if (isShutdown) {
+      logInfo("Cannot get or create connection while client is shutdown.")
+      return None
+    }
+
+    if (connectionPool.nonEmpty) {
+      return Some(connectionPool.dequeue())
+    }
+
+    Some(new CallbackConnection(address, port))
+  }
+
+  private def addConnection(connection: CallbackConnection): Unit = synchronized {
+    assert(connection != null)
+    connectionPool.enqueue(connection)
+  }
+
+  def shutdown(): Unit = synchronized {
+    if (isShutdown) {
+      logInfo("Shutdown called, but already shutdown.")
+      return
+    }
+
+    logInfo("Shutting down.")
+    connectionPool.foreach(_.close)
+    connectionPool.clear
+    isShutdown = true
+  }
+}
diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
new file mode 100644
index 000000000..36726181e
--- /dev/null
+++ b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.{ByteArrayOutputStream, Closeable, DataInputStream, DataOutputStream}
+import java.net.Socket
+
+import org.apache.spark.internal.Logging
+
+/**
+ * CallbackConnection is used to process the callback communication
+ * between the JVM and Dotnet. It uses a TCP socket to communicate with
+ * the Dotnet CallbackServer and the socket is expected to be reused.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackConnection(address: String, port: Int) extends Logging {
+  private[this] val socket: Socket = new Socket(address, port)
+  private[this] val inputStream: DataInputStream = new DataInputStream(socket.getInputStream)
+  private[this] val outputStream: DataOutputStream = new DataOutputStream(socket.getOutputStream)
+
+  def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit = {
+    logInfo(s"Calling callback [callback id = $callbackId] ...")
+
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CALLBACK)
+      SerDe.writeInt(outputStream, callbackId)
+
+      val byteArrayOutputStream = new ByteArrayOutputStream()
+      writeBody(new DataOutputStream(byteArrayOutputStream))
+      SerDe.writeInt(outputStream, byteArrayOutputStream.size)
+      byteArrayOutputStream.writeTo(outputStream);
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error writing to stream.", e)
+      }
+    }
+
+    logInfo(s"Signaling END_OF_STREAM.")
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.END_OF_STREAM)
+      outputStream.flush()
+
+      val endOfStreamResponse = readFlag(inputStream)
+      endOfStreamResponse match {
+        case CallbackFlags.END_OF_STREAM =>
+          logInfo(s"Received END_OF_STREAM signal. Calling callback [callback id = $callbackId] successful.")
+        case _ =>  {
+          throw new Exception(s"Error verifying end of stream. Expected: ${CallbackFlags.END_OF_STREAM}, " +
+              s"Received: $endOfStreamResponse")
+        }
+      }
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error while verifying end of stream.", e)
+      }
+    }
+  }
+
+  def close(): Unit = {
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CLOSE)
+      outputStream.flush()
+    } catch {
+      case e: Exception => logInfo("Unable to send close to .NET callback server.", e)
+    }
+
+    close(socket)
+    close(outputStream)
+    close(inputStream)
+  }
+
+  private def close(s: Socket): Unit = {
+    try {
+      assert(s != null)
+      s.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close socket.", e)
+    }
+  }
+
+  private def close(c: Closeable): Unit = {
+    try {
+      assert(c != null)
+      c.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close closeable.", e)
+    }
+  }
+
+  private def readFlag(inputStream: DataInputStream): Int = {
+    val callbackFlag = SerDe.readInt(inputStream)
+    if (callbackFlag == CallbackFlags.DOTNET_EXCEPTION_THROWN) {
+      val exceptionMessage = SerDe.readString(inputStream)
+      throw new DotnetException(exceptionMessage)
+    }
+    callbackFlag
+  }
+
+  private object CallbackFlags {
+    val CLOSE: Int = -1
+    val CALLBACK: Int = -2
+    val DOTNET_EXCEPTION_THROWN: Int = -3
+    val END_OF_STREAM: Int = -4
+  }
+}
\ No newline at end of file
diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
index 45b3cd5a4..002945bb8 100644
--- a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
+++ b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
@@ -6,9 +6,8 @@
 
 package org.apache.spark.api.dotnet
 
-import java.io.DataOutputStream
-import java.net.{InetSocketAddress, Socket}
-import java.util.concurrent.{BlockingQueue, LinkedBlockingQueue, TimeUnit}
+import java.net.InetSocketAddress
+import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
 import io.netty.channel.nio.NioEventLoopGroup
@@ -50,7 +49,6 @@ class DotnetBackend extends Logging {
             // lengthFieldLength = 4
             // lengthAdjustment = 0
             // initialBytesToStrip = 4, i.e.  strip out the length field itself
-            // new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
             new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
           .addLast("decoder", new ByteArrayDecoder())
           .addLast("handler", new DotnetBackendHandler(self))
@@ -81,30 +79,26 @@ class DotnetBackend extends Logging {
     bootstrap = null
 
     // Send close to .NET callback server.
-    logInfo("Requesting to close all call back sockets")
-    var socket: Socket = null
-    do {
-      socket = DotnetBackend.callbackSockets.poll()
-      if (socket != null) {
-        try {
-          val dos = new DataOutputStream(socket.getOutputStream)
-          SerDe.writeString(dos, "close")
-          socket.close()
-          socket = null
-        } catch {
-          case e: Exception => logError("Exception when closing socket: ", e)
-        }
-      }
-    } while (socket != null)
-    DotnetBackend.callbackSocketShutdown = true
+    DotnetBackend.shutdownCallbackClient()
   }
 }
 
-object DotnetBackend {
-  // Channels to callback server.
-  private[spark] val callbackSockets: BlockingQueue[Socket] = new LinkedBlockingQueue[Socket]()
-  @volatile private[spark] var callbackPort: Int = 0
+object DotnetBackend extends Logging {
+  @volatile private[spark] var callbackClient: CallbackClient = null
+
+  private[spark] def setCallbackClient(address: String, port: Int) = synchronized {
+    if (DotnetBackend.callbackClient == null) {
+      logInfo(s"Connecting to a callback server at $address:$port")
+      DotnetBackend.callbackClient = new CallbackClient(address, port)
+    } else {
+      throw new Exception("Callback client already set.")
+    }
+  }
 
-  // flag to denote whether the callback socket is shutdown explicitly
-  @volatile private[spark] var callbackSocketShutdown: Boolean = false
+  private[spark] def shutdownCallbackClient(): Unit = synchronized {
+    if (callbackClient != null) {
+      callbackClient.shutdown()
+      callbackClient = null
+    }
+  }
 }
diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
index f4e9490a0..1cde1d1c5 100644
--- a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
+++ b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
@@ -7,7 +7,6 @@
 package org.apache.spark.api.dotnet
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
-import java.net.Socket
 
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
 import org.apache.spark.api.dotnet.SerDe._
@@ -67,32 +66,16 @@ class DotnetBackendHandler(server: DotnetBackend)
               writeInt(dos, -1)
           }
         case "connectCallback" =>
-          val t = readObjectType(dis)
-          assert(t == 'i')
+          assert(readObjectType(dis) == 'c')
+          val address = readString(dis)
+          assert(readObjectType(dis) == 'i')
           val port = readInt(dis)
-          logInfo(s"Connecting to a callback server at port $port")
-          DotnetBackend.callbackPort = port
+          DotnetBackend.setCallbackClient(address, port);
           writeInt(dos, 0)
           writeType(dos, "void")
         case "closeCallback" =>
-          // Send close to .NET callback server.
-          logInfo("Requesting to close all call back sockets.")
-          var socket: Socket = null
-          do {
-            socket = DotnetBackend.callbackSockets.poll()
-            if (socket != null) {
-              val dataOutputStream = new DataOutputStream(socket.getOutputStream)
-              SerDe.writeString(dataOutputStream, "close")
-              try {
-                socket.close()
-                socket = null
-              } catch {
-                case e: Exception => logError("Exception when closing socket: ", e)
-              }
-            }
-          } while (socket != null)
-          DotnetBackend.callbackSocketShutdown = true
-
+          logInfo("Requesting to close callback client")
+          DotnetBackend.shutdownCallbackClient()
           writeInt(dos, 0)
           writeType(dos, "void")
 
diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
new file mode 100644
index 000000000..c70d16b03
--- /dev/null
+++ b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
@@ -0,0 +1,13 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+class DotnetException(message: String, cause: Throwable)
+  extends Exception(message, cause) {
+
+  def this(message: String) = this(message, null)
+}
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
new file mode 100644
index 000000000..0026d78df
--- /dev/null
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.DataOutputStream
+
+import org.apache.spark.internal.Logging
+
+import scala.collection.mutable.Queue
+
+/**
+ * CallbackClient is used to communicate with the Dotnet CallbackServer.
+ * The client manages and maintains a pool of open CallbackConnections.
+ * Any callback request is delegated to a new CallbackConnection or
+ * unused CallbackConnection.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackClient(address: String, port: Int) extends Logging {
+  private[this] val connectionPool: Queue[CallbackConnection] = Queue[CallbackConnection]()
+
+  private[this] var isShutdown: Boolean = false
+
+  final def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit =
+    getOrCreateConnection() match {
+      case Some(connection) =>
+        try {
+          connection.send(callbackId, writeBody)
+        } catch {
+          case e: Exception =>
+            logError(s"Error calling callback [callback id = $callbackId].", e)
+            connection.close()
+            throw e
+        }
+      case None => throw new Exception("Unable to get or create connection.")
+    }
+
+  private def getOrCreateConnection(): Option[CallbackConnection] = synchronized {
+    if (isShutdown) {
+      logInfo("Cannot get or create connection while client is shutdown.")
+      return None
+    }
+
+    if (connectionPool.nonEmpty) {
+      return Some(connectionPool.dequeue())
+    }
+
+    Some(new CallbackConnection(address, port))
+  }
+
+  private def addConnection(connection: CallbackConnection): Unit = synchronized {
+    assert(connection != null)
+    connectionPool.enqueue(connection)
+  }
+
+  def shutdown(): Unit = synchronized {
+    if (isShutdown) {
+      logInfo("Shutdown called, but already shutdown.")
+      return
+    }
+
+    logInfo("Shutting down.")
+    connectionPool.foreach(_.close)
+    connectionPool.clear
+    isShutdown = true
+  }
+}
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
new file mode 100644
index 000000000..36726181e
--- /dev/null
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.{ByteArrayOutputStream, Closeable, DataInputStream, DataOutputStream}
+import java.net.Socket
+
+import org.apache.spark.internal.Logging
+
+/**
+ * CallbackConnection is used to process the callback communication
+ * between the JVM and Dotnet. It uses a TCP socket to communicate with
+ * the Dotnet CallbackServer and the socket is expected to be reused.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackConnection(address: String, port: Int) extends Logging {
+  private[this] val socket: Socket = new Socket(address, port)
+  private[this] val inputStream: DataInputStream = new DataInputStream(socket.getInputStream)
+  private[this] val outputStream: DataOutputStream = new DataOutputStream(socket.getOutputStream)
+
+  def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit = {
+    logInfo(s"Calling callback [callback id = $callbackId] ...")
+
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CALLBACK)
+      SerDe.writeInt(outputStream, callbackId)
+
+      val byteArrayOutputStream = new ByteArrayOutputStream()
+      writeBody(new DataOutputStream(byteArrayOutputStream))
+      SerDe.writeInt(outputStream, byteArrayOutputStream.size)
+      byteArrayOutputStream.writeTo(outputStream);
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error writing to stream.", e)
+      }
+    }
+
+    logInfo(s"Signaling END_OF_STREAM.")
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.END_OF_STREAM)
+      outputStream.flush()
+
+      val endOfStreamResponse = readFlag(inputStream)
+      endOfStreamResponse match {
+        case CallbackFlags.END_OF_STREAM =>
+          logInfo(s"Received END_OF_STREAM signal. Calling callback [callback id = $callbackId] successful.")
+        case _ =>  {
+          throw new Exception(s"Error verifying end of stream. Expected: ${CallbackFlags.END_OF_STREAM}, " +
+              s"Received: $endOfStreamResponse")
+        }
+      }
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error while verifying end of stream.", e)
+      }
+    }
+  }
+
+  def close(): Unit = {
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CLOSE)
+      outputStream.flush()
+    } catch {
+      case e: Exception => logInfo("Unable to send close to .NET callback server.", e)
+    }
+
+    close(socket)
+    close(outputStream)
+    close(inputStream)
+  }
+
+  private def close(s: Socket): Unit = {
+    try {
+      assert(s != null)
+      s.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close socket.", e)
+    }
+  }
+
+  private def close(c: Closeable): Unit = {
+    try {
+      assert(c != null)
+      c.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close closeable.", e)
+    }
+  }
+
+  private def readFlag(inputStream: DataInputStream): Int = {
+    val callbackFlag = SerDe.readInt(inputStream)
+    if (callbackFlag == CallbackFlags.DOTNET_EXCEPTION_THROWN) {
+      val exceptionMessage = SerDe.readString(inputStream)
+      throw new DotnetException(exceptionMessage)
+    }
+    callbackFlag
+  }
+
+  private object CallbackFlags {
+    val CLOSE: Int = -1
+    val CALLBACK: Int = -2
+    val DOTNET_EXCEPTION_THROWN: Int = -3
+    val END_OF_STREAM: Int = -4
+  }
+}
\ No newline at end of file
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
index 45b3cd5a4..002945bb8 100644
--- a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
@@ -6,9 +6,8 @@
 
 package org.apache.spark.api.dotnet
 
-import java.io.DataOutputStream
-import java.net.{InetSocketAddress, Socket}
-import java.util.concurrent.{BlockingQueue, LinkedBlockingQueue, TimeUnit}
+import java.net.InetSocketAddress
+import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
 import io.netty.channel.nio.NioEventLoopGroup
@@ -50,7 +49,6 @@ class DotnetBackend extends Logging {
             // lengthFieldLength = 4
             // lengthAdjustment = 0
             // initialBytesToStrip = 4, i.e.  strip out the length field itself
-            // new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
             new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
           .addLast("decoder", new ByteArrayDecoder())
           .addLast("handler", new DotnetBackendHandler(self))
@@ -81,30 +79,26 @@ class DotnetBackend extends Logging {
     bootstrap = null
 
     // Send close to .NET callback server.
-    logInfo("Requesting to close all call back sockets")
-    var socket: Socket = null
-    do {
-      socket = DotnetBackend.callbackSockets.poll()
-      if (socket != null) {
-        try {
-          val dos = new DataOutputStream(socket.getOutputStream)
-          SerDe.writeString(dos, "close")
-          socket.close()
-          socket = null
-        } catch {
-          case e: Exception => logError("Exception when closing socket: ", e)
-        }
-      }
-    } while (socket != null)
-    DotnetBackend.callbackSocketShutdown = true
+    DotnetBackend.shutdownCallbackClient()
   }
 }
 
-object DotnetBackend {
-  // Channels to callback server.
-  private[spark] val callbackSockets: BlockingQueue[Socket] = new LinkedBlockingQueue[Socket]()
-  @volatile private[spark] var callbackPort: Int = 0
+object DotnetBackend extends Logging {
+  @volatile private[spark] var callbackClient: CallbackClient = null
+
+  private[spark] def setCallbackClient(address: String, port: Int) = synchronized {
+    if (DotnetBackend.callbackClient == null) {
+      logInfo(s"Connecting to a callback server at $address:$port")
+      DotnetBackend.callbackClient = new CallbackClient(address, port)
+    } else {
+      throw new Exception("Callback client already set.")
+    }
+  }
 
-  // flag to denote whether the callback socket is shutdown explicitly
-  @volatile private[spark] var callbackSocketShutdown: Boolean = false
+  private[spark] def shutdownCallbackClient(): Unit = synchronized {
+    if (callbackClient != null) {
+      callbackClient.shutdown()
+      callbackClient = null
+    }
+  }
 }
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
index f4e9490a0..1cde1d1c5 100644
--- a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
@@ -7,7 +7,6 @@
 package org.apache.spark.api.dotnet
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
-import java.net.Socket
 
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
 import org.apache.spark.api.dotnet.SerDe._
@@ -67,32 +66,16 @@ class DotnetBackendHandler(server: DotnetBackend)
               writeInt(dos, -1)
           }
         case "connectCallback" =>
-          val t = readObjectType(dis)
-          assert(t == 'i')
+          assert(readObjectType(dis) == 'c')
+          val address = readString(dis)
+          assert(readObjectType(dis) == 'i')
           val port = readInt(dis)
-          logInfo(s"Connecting to a callback server at port $port")
-          DotnetBackend.callbackPort = port
+          DotnetBackend.setCallbackClient(address, port);
           writeInt(dos, 0)
           writeType(dos, "void")
         case "closeCallback" =>
-          // Send close to .NET callback server.
-          logInfo("Requesting to close all call back sockets.")
-          var socket: Socket = null
-          do {
-            socket = DotnetBackend.callbackSockets.poll()
-            if (socket != null) {
-              val dataOutputStream = new DataOutputStream(socket.getOutputStream)
-              SerDe.writeString(dataOutputStream, "close")
-              try {
-                socket.close()
-                socket = null
-              } catch {
-                case e: Exception => logError("Exception when closing socket: ", e)
-              }
-            }
-          } while (socket != null)
-          DotnetBackend.callbackSocketShutdown = true
-
+          logInfo("Requesting to close callback client")
+          DotnetBackend.shutdownCallbackClient()
           writeInt(dos, 0)
           writeType(dos, "void")
 
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
new file mode 100644
index 000000000..c70d16b03
--- /dev/null
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
@@ -0,0 +1,13 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+class DotnetException(message: String, cause: Throwable)
+  extends Exception(message, cause) {
+
+  def this(message: String) = this(message, null)
+}
diff --git a/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala
new file mode 100644
index 000000000..c0de9c7bc
--- /dev/null
+++ b/src/scala/microsoft-spark-2.4.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.sql.api.dotnet
+
+import org.apache.spark.api.dotnet.{CallbackClient, DotnetBackend, SerDe}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.streaming.DataStreamWriter
+
+class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging {
+  def call(batchDF: DataFrame, batchId: Long): Unit =
+    callbackClient.send(
+      callbackId,
+      dos => {
+        SerDe.writeJObj(dos, batchDF)
+        SerDe.writeLong(dos, batchId)
+      })
+}
+
+object DotnetForeachBatchHelper {
+  def callForeachBatch(dsw: DataStreamWriter[Row], callbackId: Int): Unit = {
+    val callbackClient = DotnetBackend.callbackClient
+    if (callbackClient == null) {
+      throw new Exception("DotnetBackend.callbackClient is null.")
+    }
+
+    val dotnetForeachFunc = new DotnetForeachBatchFunction(callbackClient, callbackId)
+    dsw.foreachBatch(dotnetForeachFunc.call _)
+  }
+}
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
new file mode 100644
index 000000000..0026d78df
--- /dev/null
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackClient.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.DataOutputStream
+
+import org.apache.spark.internal.Logging
+
+import scala.collection.mutable.Queue
+
+/**
+ * CallbackClient is used to communicate with the Dotnet CallbackServer.
+ * The client manages and maintains a pool of open CallbackConnections.
+ * Any callback request is delegated to a new CallbackConnection or
+ * unused CallbackConnection.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackClient(address: String, port: Int) extends Logging {
+  private[this] val connectionPool: Queue[CallbackConnection] = Queue[CallbackConnection]()
+
+  private[this] var isShutdown: Boolean = false
+
+  final def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit =
+    getOrCreateConnection() match {
+      case Some(connection) =>
+        try {
+          connection.send(callbackId, writeBody)
+        } catch {
+          case e: Exception =>
+            logError(s"Error calling callback [callback id = $callbackId].", e)
+            connection.close()
+            throw e
+        }
+      case None => throw new Exception("Unable to get or create connection.")
+    }
+
+  private def getOrCreateConnection(): Option[CallbackConnection] = synchronized {
+    if (isShutdown) {
+      logInfo("Cannot get or create connection while client is shutdown.")
+      return None
+    }
+
+    if (connectionPool.nonEmpty) {
+      return Some(connectionPool.dequeue())
+    }
+
+    Some(new CallbackConnection(address, port))
+  }
+
+  private def addConnection(connection: CallbackConnection): Unit = synchronized {
+    assert(connection != null)
+    connectionPool.enqueue(connection)
+  }
+
+  def shutdown(): Unit = synchronized {
+    if (isShutdown) {
+      logInfo("Shutdown called, but already shutdown.")
+      return
+    }
+
+    logInfo("Shutting down.")
+    connectionPool.foreach(_.close)
+    connectionPool.clear
+    isShutdown = true
+  }
+}
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
new file mode 100644
index 000000000..36726181e
--- /dev/null
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/CallbackConnection.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+import java.io.{ByteArrayOutputStream, Closeable, DataInputStream, DataOutputStream}
+import java.net.Socket
+
+import org.apache.spark.internal.Logging
+
+/**
+ * CallbackConnection is used to process the callback communication
+ * between the JVM and Dotnet. It uses a TCP socket to communicate with
+ * the Dotnet CallbackServer and the socket is expected to be reused.
+ * @param address The address of the Dotnet CallbackServer
+ * @param port The port of the Dotnet CallbackServer
+ */
+class CallbackConnection(address: String, port: Int) extends Logging {
+  private[this] val socket: Socket = new Socket(address, port)
+  private[this] val inputStream: DataInputStream = new DataInputStream(socket.getInputStream)
+  private[this] val outputStream: DataOutputStream = new DataOutputStream(socket.getOutputStream)
+
+  def send(
+      callbackId: Int,
+      writeBody: DataOutputStream => Unit): Unit = {
+    logInfo(s"Calling callback [callback id = $callbackId] ...")
+
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CALLBACK)
+      SerDe.writeInt(outputStream, callbackId)
+
+      val byteArrayOutputStream = new ByteArrayOutputStream()
+      writeBody(new DataOutputStream(byteArrayOutputStream))
+      SerDe.writeInt(outputStream, byteArrayOutputStream.size)
+      byteArrayOutputStream.writeTo(outputStream);
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error writing to stream.", e)
+      }
+    }
+
+    logInfo(s"Signaling END_OF_STREAM.")
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.END_OF_STREAM)
+      outputStream.flush()
+
+      val endOfStreamResponse = readFlag(inputStream)
+      endOfStreamResponse match {
+        case CallbackFlags.END_OF_STREAM =>
+          logInfo(s"Received END_OF_STREAM signal. Calling callback [callback id = $callbackId] successful.")
+        case _ =>  {
+          throw new Exception(s"Error verifying end of stream. Expected: ${CallbackFlags.END_OF_STREAM}, " +
+              s"Received: $endOfStreamResponse")
+        }
+      }
+    } catch {
+      case e: Exception => {
+        throw new Exception("Error while verifying end of stream.", e)
+      }
+    }
+  }
+
+  def close(): Unit = {
+    try {
+      SerDe.writeInt(outputStream, CallbackFlags.CLOSE)
+      outputStream.flush()
+    } catch {
+      case e: Exception => logInfo("Unable to send close to .NET callback server.", e)
+    }
+
+    close(socket)
+    close(outputStream)
+    close(inputStream)
+  }
+
+  private def close(s: Socket): Unit = {
+    try {
+      assert(s != null)
+      s.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close socket.", e)
+    }
+  }
+
+  private def close(c: Closeable): Unit = {
+    try {
+      assert(c != null)
+      c.close()
+    } catch {
+      case e: Exception => logInfo("Unable to close closeable.", e)
+    }
+  }
+
+  private def readFlag(inputStream: DataInputStream): Int = {
+    val callbackFlag = SerDe.readInt(inputStream)
+    if (callbackFlag == CallbackFlags.DOTNET_EXCEPTION_THROWN) {
+      val exceptionMessage = SerDe.readString(inputStream)
+      throw new DotnetException(exceptionMessage)
+    }
+    callbackFlag
+  }
+
+  private object CallbackFlags {
+    val CLOSE: Int = -1
+    val CALLBACK: Int = -2
+    val DOTNET_EXCEPTION_THROWN: Int = -3
+    val END_OF_STREAM: Int = -4
+  }
+}
\ No newline at end of file
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
index 45b3cd5a4..002945bb8 100644
--- a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackend.scala
@@ -6,9 +6,8 @@
 
 package org.apache.spark.api.dotnet
 
-import java.io.DataOutputStream
-import java.net.{InetSocketAddress, Socket}
-import java.util.concurrent.{BlockingQueue, LinkedBlockingQueue, TimeUnit}
+import java.net.InetSocketAddress
+import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
 import io.netty.channel.nio.NioEventLoopGroup
@@ -50,7 +49,6 @@ class DotnetBackend extends Logging {
             // lengthFieldLength = 4
             // lengthAdjustment = 0
             // initialBytesToStrip = 4, i.e.  strip out the length field itself
-            // new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
             new LengthFieldBasedFrameDecoder(Integer.MAX_VALUE, 0, 4, 0, 4))
           .addLast("decoder", new ByteArrayDecoder())
           .addLast("handler", new DotnetBackendHandler(self))
@@ -81,30 +79,26 @@ class DotnetBackend extends Logging {
     bootstrap = null
 
     // Send close to .NET callback server.
-    logInfo("Requesting to close all call back sockets")
-    var socket: Socket = null
-    do {
-      socket = DotnetBackend.callbackSockets.poll()
-      if (socket != null) {
-        try {
-          val dos = new DataOutputStream(socket.getOutputStream)
-          SerDe.writeString(dos, "close")
-          socket.close()
-          socket = null
-        } catch {
-          case e: Exception => logError("Exception when closing socket: ", e)
-        }
-      }
-    } while (socket != null)
-    DotnetBackend.callbackSocketShutdown = true
+    DotnetBackend.shutdownCallbackClient()
   }
 }
 
-object DotnetBackend {
-  // Channels to callback server.
-  private[spark] val callbackSockets: BlockingQueue[Socket] = new LinkedBlockingQueue[Socket]()
-  @volatile private[spark] var callbackPort: Int = 0
+object DotnetBackend extends Logging {
+  @volatile private[spark] var callbackClient: CallbackClient = null
+
+  private[spark] def setCallbackClient(address: String, port: Int) = synchronized {
+    if (DotnetBackend.callbackClient == null) {
+      logInfo(s"Connecting to a callback server at $address:$port")
+      DotnetBackend.callbackClient = new CallbackClient(address, port)
+    } else {
+      throw new Exception("Callback client already set.")
+    }
+  }
 
-  // flag to denote whether the callback socket is shutdown explicitly
-  @volatile private[spark] var callbackSocketShutdown: Boolean = false
+  private[spark] def shutdownCallbackClient(): Unit = synchronized {
+    if (callbackClient != null) {
+      callbackClient.shutdown()
+      callbackClient = null
+    }
+  }
 }
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
index f4e9490a0..1cde1d1c5 100644
--- a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
@@ -7,7 +7,6 @@
 package org.apache.spark.api.dotnet
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
-import java.net.Socket
 
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
 import org.apache.spark.api.dotnet.SerDe._
@@ -67,32 +66,16 @@ class DotnetBackendHandler(server: DotnetBackend)
               writeInt(dos, -1)
           }
         case "connectCallback" =>
-          val t = readObjectType(dis)
-          assert(t == 'i')
+          assert(readObjectType(dis) == 'c')
+          val address = readString(dis)
+          assert(readObjectType(dis) == 'i')
           val port = readInt(dis)
-          logInfo(s"Connecting to a callback server at port $port")
-          DotnetBackend.callbackPort = port
+          DotnetBackend.setCallbackClient(address, port);
           writeInt(dos, 0)
           writeType(dos, "void")
         case "closeCallback" =>
-          // Send close to .NET callback server.
-          logInfo("Requesting to close all call back sockets.")
-          var socket: Socket = null
-          do {
-            socket = DotnetBackend.callbackSockets.poll()
-            if (socket != null) {
-              val dataOutputStream = new DataOutputStream(socket.getOutputStream)
-              SerDe.writeString(dataOutputStream, "close")
-              try {
-                socket.close()
-                socket = null
-              } catch {
-                case e: Exception => logError("Exception when closing socket: ", e)
-              }
-            }
-          } while (socket != null)
-          DotnetBackend.callbackSocketShutdown = true
-
+          logInfo("Requesting to close callback client")
+          DotnetBackend.shutdownCallbackClient()
           writeInt(dos, 0)
           writeType(dos, "void")
 
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
new file mode 100644
index 000000000..c70d16b03
--- /dev/null
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala
@@ -0,0 +1,13 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.api.dotnet
+
+class DotnetException(message: String, cause: Throwable)
+  extends Exception(message, cause) {
+
+  def this(message: String) = this(message, null)
+}
diff --git a/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala
new file mode 100644
index 000000000..c0de9c7bc
--- /dev/null
+++ b/src/scala/microsoft-spark-3.0.x/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the .NET Foundation under one or more agreements.
+ * The .NET Foundation licenses this file to you under the MIT license.
+ * See the LICENSE file in the project root for more information.
+ */
+
+package org.apache.spark.sql.api.dotnet
+
+import org.apache.spark.api.dotnet.{CallbackClient, DotnetBackend, SerDe}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.streaming.DataStreamWriter
+
+class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging {
+  def call(batchDF: DataFrame, batchId: Long): Unit =
+    callbackClient.send(
+      callbackId,
+      dos => {
+        SerDe.writeJObj(dos, batchDF)
+        SerDe.writeLong(dos, batchId)
+      })
+}
+
+object DotnetForeachBatchHelper {
+  def callForeachBatch(dsw: DataStreamWriter[Row], callbackId: Int): Unit = {
+    val callbackClient = DotnetBackend.callbackClient
+    if (callbackClient == null) {
+      throw new Exception("DotnetBackend.callbackClient is null.")
+    }
+
+    val dotnetForeachFunc = new DotnetForeachBatchFunction(callbackClient, callbackId)
+    dsw.foreachBatch(dotnetForeachFunc.call _)
+  }
+}

From c889f39a2827b94ad1ad6503f18836633f5fd00c Mon Sep 17 00:00:00 2001
From: elvaliuliuliu <47404285+elvaliuliuliu@users.noreply.github.com>
Date: Tue, 23 Jun 2020 10:58:15 -0700
Subject: [PATCH 14/27] Fix NRE for TimestampType and DateType and support
 nullable value types (#530)

---
 .../IpcTests/Sql/SparkSessionTests.cs         | 48 +++++++++++++++----
 .../Microsoft.Spark/Sql/SparkSession.cs       | 45 ++++++++++++++---
 .../Microsoft.Spark/Sql/Types/SimpleTypes.cs  | 19 ++++----
 3 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/SparkSessionTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/SparkSessionTests.cs
index c312ddc6c..5a70a6698 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/SparkSessionTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/SparkSessionTests.cs
@@ -94,7 +94,7 @@ public void TestCreateDataFrame()
 
             // Calling CreateDataFrame(IEnumerable<string> _) without schema
             {
-                var data = new List<string>(new string[] { "Alice", "Bob" });
+                var data = new string[] { "Alice", "Bob", null };
                 StructType schema = SchemaWithSingleColumn(new StringType());
 
                 DataFrame df = _spark.CreateDataFrame(data);
@@ -103,7 +103,16 @@ public void TestCreateDataFrame()
 
             // Calling CreateDataFrame(IEnumerable<int> _) without schema
             {
-                var data = new List<int>(new int[] { 1, 2 });
+                var data = new int[] { 1, 2 };
+                StructType schema = SchemaWithSingleColumn(new IntegerType(), false);
+
+                DataFrame df = _spark.CreateDataFrame(data);
+                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
+            }
+
+            // Calling CreateDataFrame(IEnumerable<int?> _) without schema
+            {
+                var data = new int?[] { 1, 2, null };
                 StructType schema = SchemaWithSingleColumn(new IntegerType());
 
                 DataFrame df = _spark.CreateDataFrame(data);
@@ -112,7 +121,16 @@ public void TestCreateDataFrame()
 
             // Calling CreateDataFrame(IEnumerable<double> _) without schema
             {
-                var data = new List<double>(new double[] { 1.2, 2.3 });
+                var data = new double[] { 1.2, 2.3 };
+                StructType schema = SchemaWithSingleColumn(new DoubleType(), false);
+
+                DataFrame df = _spark.CreateDataFrame(data);
+                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
+            }
+
+            // Calling CreateDataFrame(IEnumerable<double?> _) without schema
+            {
+                var data = new double?[] { 1.2, 2.3, null };
                 StructType schema = SchemaWithSingleColumn(new DoubleType());
 
                 DataFrame df = _spark.CreateDataFrame(data);
@@ -121,19 +139,29 @@ public void TestCreateDataFrame()
 
             // Calling CreateDataFrame(IEnumerable<bool> _) without schema
             {
-                var data = new List<bool>(new bool[] { true, false });
+                var data = new bool[] { true, false };
+                StructType schema = SchemaWithSingleColumn(new BooleanType(), false);
+
+                DataFrame df = _spark.CreateDataFrame(data);
+                ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
+            }
+
+            // Calling CreateDataFrame(IEnumerable<bool?> _) without schema
+            {
+                var data = new bool?[] { true, false, null };
                 StructType schema = SchemaWithSingleColumn(new BooleanType());
 
                 DataFrame df = _spark.CreateDataFrame(data);
                 ValidateDataFrame(df, data.Select(a => new object[] { a }), schema);
             }
-            
+
             // Calling CreateDataFrame(IEnumerable<Date> _) without schema
             {
                 var data = new Date[]
                 {
                     new Date(2020, 1, 1),
-                    new Date(2020, 1, 2)
+                    new Date(2020, 1, 2),
+                    null
                 };
                 StructType schema = SchemaWithSingleColumn(new DateType());
 
@@ -151,7 +179,8 @@ public void TestCreateDataFrameWithTimestamp()
             var data = new Timestamp[]
                 {
                     new Timestamp(2020, 1, 1, 0, 0, 0, 0),
-                    new Timestamp(2020, 1, 2, 15, 30, 30, 0)
+                    new Timestamp(2020, 1, 2, 15, 30, 30, 0),
+                    null
                 };
             StructType schema = SchemaWithSingleColumn(new TimestampType());
 
@@ -172,8 +201,9 @@ private void ValidateDataFrame(
         /// Returns a single column schema of the given datatype.
         /// </summary>
         /// <param name="dataType">Datatype of the column</param>
+        /// <param name="isNullable">Indicates if values of the column can be null</param>
         /// <returns>Schema as StructType</returns>
-        private StructType SchemaWithSingleColumn(DataType dataType) =>
-            new StructType(new[] { new StructField("_1", dataType) });
+        private StructType SchemaWithSingleColumn(DataType dataType, bool isNullable = true) =>
+            new StructType(new[] { new StructField("_1", dataType, isNullable) });
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Sql/SparkSession.cs b/src/csharp/Microsoft.Spark/Sql/SparkSession.cs
index fc706081f..f0eab693f 100644
--- a/src/csharp/Microsoft.Spark/Sql/SparkSession.cs
+++ b/src/csharp/Microsoft.Spark/Sql/SparkSession.cs
@@ -151,9 +151,9 @@ public DataFrame Table(string tableName) =>
             new DataFrame((JvmObjectReference)_jvmObject.Invoke("table", tableName));
 
         /// <summary>
-        /// Creates a <see cref="DataFrame"/> from an <see cref="IEnumerable"/> containing 
+        /// Creates a <see cref="DataFrame"/> from an <see cref="IEnumerable"/> containing
         /// <see cref="GenericRow"/>s using the given schema.
-        /// It is important to make sure that the structure of every <see cref="GenericRow"/> of 
+        /// It is important to make sure that the structure of every <see cref="GenericRow"/> of
         /// the provided <see cref="IEnumerable"/> matches
         /// the provided schema. Otherwise, there will be runtime exception.
         /// </summary>
@@ -172,10 +172,21 @@ public DataFrame CreateDataFrame(IEnumerable<GenericRow> data, StructType schema
         /// <param name="data"><see cref="IEnumerable"/> of type <see cref="int"/></param>
         /// <returns>Dataframe object</returns>
         public DataFrame CreateDataFrame(IEnumerable<int> data) =>
+            CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new IntegerType(), false));
+
+        /// <summary>
+        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Int32}"/>
+        /// </summary>
+        /// <param name="data"><see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Int32}"/></param>
+        /// <returns>Dataframe object</returns>
+        public DataFrame CreateDataFrame(IEnumerable<int?> data) =>
             CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new IntegerType()));
 
         /// <summary>
-        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type <see cref="string"/>
+        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type
+        /// <see cref="string"/>
         /// </summary>
         /// <param name="data"><see cref="IEnumerable"/> of type <see cref="string"/></param>
         /// <returns>Dataframe object</returns>
@@ -183,11 +194,22 @@ public DataFrame CreateDataFrame(IEnumerable<string> data) =>
             CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new StringType()));
 
         /// <summary>
-        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type <see cref="double"/>
+        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type
+        /// <see cref="double"/>
         /// </summary>
         /// <param name="data"><see cref="IEnumerable"/> of type <see cref="double"/></param>
         /// <returns>Dataframe object</returns>
         public DataFrame CreateDataFrame(IEnumerable<double> data) =>
+            CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new DoubleType(), false));
+
+        /// <summary>
+        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Double}"/>
+        /// </summary>
+        /// <param name="data"><see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Double}"/></param>
+        /// <returns>Dataframe object</returns>
+        public DataFrame CreateDataFrame(IEnumerable<double?> data) =>
             CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new DoubleType()));
 
         /// <summary>
@@ -196,6 +218,16 @@ public DataFrame CreateDataFrame(IEnumerable<double> data) =>
         /// <param name="data"><see cref="IEnumerable"/> of type <see cref="bool"/></param>
         /// <returns>Dataframe object</returns>
         public DataFrame CreateDataFrame(IEnumerable<bool> data) =>
+            CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new BooleanType(), false));
+
+        /// <summary>
+        /// Creates a Dataframe given data as <see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Boolean}"/>
+        /// </summary>
+        /// <param name="data"><see cref="IEnumerable"/> of type
+        /// <see cref="Nullable{Boolean}"/></param>
+        /// <returns>Dataframe object</returns>
+        public DataFrame CreateDataFrame(IEnumerable<bool?> data) =>
             CreateDataFrame(ToGenericRows(data), SchemaWithSingleColumn(new BooleanType()));
 
         /// <summary>
@@ -299,9 +331,10 @@ public UdfRegistration Udf() =>
         /// Returns a single column schema of the given datatype.
         /// </summary>
         /// <param name="dataType">Datatype of the column</param>
+        /// <param name="isNullable">Indicates if values of the column can be null</param>
         /// <returns>Schema as StructType</returns>
-        private StructType SchemaWithSingleColumn(DataType dataType) =>
-            new StructType(new[] { new StructField("_1", dataType) });
+        private StructType SchemaWithSingleColumn(DataType dataType, bool isNullable = true) =>
+            new StructType(new[] { new StructField("_1", dataType, isNullable) });
 
         /// <summary>
         /// This method is transforming each element of IEnumerable of type T input into a single 
diff --git a/src/csharp/Microsoft.Spark/Sql/Types/SimpleTypes.cs b/src/csharp/Microsoft.Spark/Sql/Types/SimpleTypes.cs
index 7b9bd7a6f..0638fdb60 100644
--- a/src/csharp/Microsoft.Spark/Sql/Types/SimpleTypes.cs
+++ b/src/csharp/Microsoft.Spark/Sql/Types/SimpleTypes.cs
@@ -81,6 +81,11 @@ public sealed class DateType : AtomicType
         /// </summary>
         internal override object FromInternal(object obj)
         {
+            if (obj == null)
+            {
+                return null;
+            }
+
             return new Date(new DateTime((int)obj * TimeSpan.TicksPerDay + s_unixTimeEpoch.Ticks));
         }
     }
@@ -101,16 +106,14 @@ public sealed class TimestampType : AtomicType
         /// </summary>
         internal override object FromInternal(object obj)
         {
-            // Known issue that if the original type is "long" and its value can be fit into the
-            // "int", Pickler will serialize the value as int.
-            if (obj is long val)
+            if (obj == null)
             {
-                val = (long)obj;
-            }
-            else
-            {
-                val = (int)obj;
+                return null;
             }
+
+            // Known issue that if the original type is "long" and its value can be fit into the
+            // "int", Pickler will serialize the value as int.
+            long val = (obj is long v) ? v : (int)obj;
             return new Timestamp(
                 new DateTime(val * 10 + DateType.s_unixTimeEpoch.Ticks, DateTimeKind.Utc));
         }

From 0f576e5f51ce2e7be3a2d38074991e30126dcc18 Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Tue, 23 Jun 2020 10:59:06 -0700
Subject: [PATCH 15/27] Prep 0.12.0 release (#564)

---
 README.md                               |   2 +-
 benchmark/scala/pom.xml                 |   2 +-
 docs/release-notes/0.12/release-0.12.md | 115 ++++++++++++++++++++++++
 eng/Versions.props                      |   2 +-
 src/scala/pom.xml                       |   2 +-
 5 files changed, 119 insertions(+), 4 deletions(-)
 create mode 100644 docs/release-notes/0.12/release-0.12.md

diff --git a/README.md b/README.md
index 2d5638a97..5b8647ca0 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@
     <tbody align="center">
         <tr>
             <td >2.3.*</td>
-            <td rowspan=6><a href="https://github.com/dotnet/spark/releases/tag/v0.11.0">v0.11.0</a></td>
+            <td rowspan=6><a href="https://github.com/dotnet/spark/releases/tag/v0.12.0">v0.12.0</a></td>
         </tr>
         <tr>
             <td>2.4.0</td>
diff --git a/benchmark/scala/pom.xml b/benchmark/scala/pom.xml
index 56b8dc1ea..54608d6ef 100644
--- a/benchmark/scala/pom.xml
+++ b/benchmark/scala/pom.xml
@@ -3,7 +3,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.microsoft.spark</groupId>
   <artifactId>microsoft-spark-benchmark</artifactId>
-  <version>0.11.0</version>
+  <version>0.12.0</version>
   <inceptionYear>2019</inceptionYear>
   <properties>
     <encoding>UTF-8</encoding>
diff --git a/docs/release-notes/0.12/release-0.12.md b/docs/release-notes/0.12/release-0.12.md
new file mode 100644
index 000000000..7000299ea
--- /dev/null
+++ b/docs/release-notes/0.12/release-0.12.md
@@ -0,0 +1,115 @@
+# .NET for Apache Spark 0.12 Release Notes
+
+### New Features/Improvements and Bug Fixes
+
+* Expose `DataStreamWriter.ForeachBatch` API ([#549](https://github.com/dotnet/spark/pull/549))
+* Support for [dotnet-interactive](https://github.com/dotnet/interactive) ([#515](https://github.com/dotnet/spark/pull/515)) ([#517](https://github.com/dotnet/spark/pull/517)) ([#554](https://github.com/dotnet/spark/pull/554))
+* Support for [Hyperspace v0.1.0](https://github.com/microsoft/hyperspace) APIs ([#555](https://github.com/dotnet/spark/pull/555))
+* Support for Spark 2.4.6 ([#547](https://github.com/dotnet/spark/pull/547))
+* Bug fixes:
+    * Udf bug caused by `BroadcastVariablesRegistry` ([#551](https://github.com/dotnet/spark/pull/551))
+    * Null checks for `TimestampType` and `DateType` ([#530](https://github.com/dotnet/spark/pull/530))
+* Update `Microsoft.Data.Analysis` to v`0.4.0` ([#528](https://github.com/dotnet/spark/pull/528))
+
+### Infrastructure / Documentation / Etc.
+
+* Improve build pipeline ([#510](https://github.com/dotnet/spark/pull/510)) ([#511](https://github.com/dotnet/spark/pull/511)) ([#512](https://github.com/dotnet/spark/pull/512)) ([#513](https://github.com/dotnet/spark/pull/513)) ([#524](https://github.com/dotnet/spark/pull/524))
+* Update AppName for the C# Spark Examples ([#548](https://github.com/dotnet/spark/pull/548))
+* Update maven links in build documentation ([#558](https://github.com/dotnet/spark/pull/558)) ([#560](https://github.com/dotnet/spark/pull/560))
+
+### Breaking Changes
+
+* None
+
+### Known Issues
+
+* Broadcast variables do not work with [dotnet-interactive](https://github.com/dotnet/interactive) ([#561](https://github.com/dotnet/spark/pull/561))
+
+### Compatibility
+
+#### Backward compatibility
+
+The following table describes the oldest version of the worker that the current version is compatible with, along with new features that are incompatible with the worker.
+
+<table>
+    <thead>
+        <tr>
+            <th>Oldest compatible Microsoft.Spark.Worker version</th>
+            <th>Incompatible features</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td rowspan=4>v0.9.0</td>
+            <td>DataFrame with Grouped Map UDF <a href="https://github.com/dotnet/spark/pull/277">(#277)</a></td>
+        </tr>
+        <tr>
+            <td>DataFrame with Vector UDF <a href="https://github.com/dotnet/spark/pull/277">(#277)</a></td>
+        </tr>
+        <tr>
+            <td>Support for Broadcast Variables <a href="https://github.com/dotnet/spark/pull/414">(#414)</a></td>
+        </tr>
+        <tr>
+            <td>Support for TimestampType <a href="https://github.com/dotnet/spark/pull/428">(#428)</a></td>
+        </tr>
+    </tbody>
+</table>
+
+#### Forward compatibility
+
+The following table describes the oldest version of .NET for Apache Spark release that the current worker is compatible with.
+
+<table>
+    <thead>
+        <tr>
+            <th>Oldest compatible .NET for Apache Spark release version</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td>v0.9.0</td>
+        </tr>
+    </tbody>
+</table>
+
+### Supported Spark Versions
+
+The following table outlines the supported Spark versions along with the microsoft-spark JAR to use with:
+
+<table>
+    <thead>
+        <tr>
+            <th>Spark Version</th>
+            <th>microsoft-spark JAR</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td>2.3.*</td>
+            <td>microsoft-spark-2.3.x-0.12.0.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.0</td>
+            <td rowspan=6>microsoft-spark-2.4.x-0.12.0.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.1</td>
+        </tr>
+        <tr>
+            <td>2.4.3</td>
+        </tr>
+        <tr>
+            <td>2.4.4</td>
+        </tr>
+        <tr>
+            <td>2.4.5</td>
+        </tr>
+        <tr>
+            <td>2.4.6</td>
+        </tr>
+        <tr>
+            <td>2.4.2</td>
+            <td><a href="https://github.com/dotnet/spark/issues/60">Not supported</a></td>
+        </tr>
+    </tbody>
+</table>
diff --git a/eng/Versions.props b/eng/Versions.props
index dc954bcc5..b1d1c2efd 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
-    <VersionPrefix>0.11.0</VersionPrefix>
+    <VersionPrefix>0.12.0</VersionPrefix>
     <PreReleaseVersionLabel>prerelease</PreReleaseVersionLabel>
     <RestoreSources>
       $(RestoreSources);
diff --git a/src/scala/pom.xml b/src/scala/pom.xml
index 34ee5c338..aacc2da49 100644
--- a/src/scala/pom.xml
+++ b/src/scala/pom.xml
@@ -7,7 +7,7 @@
   <version>${microsoft-spark.version}</version>
   <properties>
     <encoding>UTF-8</encoding>
-    <microsoft-spark.version>0.11.0</microsoft-spark.version>
+    <microsoft-spark.version>0.12.0</microsoft-spark.version>
   </properties>
 
   <modules>

From 0879fd1a7a6d18b061dd97281c9b9342660c87ab Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Tue, 23 Jun 2020 14:23:37 -0700
Subject: [PATCH 16/27] Bugfix for
 Microsoft.Spark.Extensions.DotNet.Interactive duplicate file exception (#565)

---
 .../AssemblyKernelExtension.cs                                | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
index 2deff5869..bb30e4957 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
@@ -45,6 +45,8 @@ public Task OnLoadAsync(IKernel kernel)
 
                 kernelBase.AddMiddleware(async (command, context, next) =>
                 {
+                    await next(command, context);
+
                     if ((context.HandlingKernel is CSharpKernel kernel) &&
                         (command is SubmitCode) &&
                         TryGetSparkSession(out SparkSession sparkSession) &&
@@ -57,8 +59,6 @@ public Task OnLoadAsync(IKernel kernel)
                             sparkSession.SparkContext.AddFile(filePath);
                         }
                     }
-
-                    await next(command, context);
                 });
             }
 

From 3106e8e87f811979cd0a5a1957ac6bcf177be2ce Mon Sep 17 00:00:00 2001
From: John Baro <johnbaro@gmail.com>
Date: Sat, 27 Jun 2020 04:42:29 +1000
Subject: [PATCH 17/27] Expose JVM exceptions (#566)

* Wrap spark exceptions per #472

* Extra tests for JvmException
Code styling per guidelines from review by @imback82 in #541

* Add JvmBridge doc link

Co-authored-by: Steve Suh <suhsteve@gmail.com>

* Fix per code guidelines

Co-authored-by: Steve Suh <suhsteve@gmail.com>

* Fix cref link

* Formatting

Co-authored-by: Steve Suh <suhsteve@gmail.com>

* Add license header

Co-authored-by: Steve Suh <suhsteve@gmail.com>
---
 .../IpcTests/JvmBridgeTests.cs                | 36 +++++++++++++++++++
 .../Microsoft.Spark/Interop/Ipc/JvmBridge.cs  |  2 +-
 src/csharp/Microsoft.Spark/JvmException.cs    | 19 ++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 src/csharp/Microsoft.Spark.E2ETest/IpcTests/JvmBridgeTests.cs
 create mode 100644 src/csharp/Microsoft.Spark/JvmException.cs

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/JvmBridgeTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/JvmBridgeTests.cs
new file mode 100644
index 000000000..3ae609f5c
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/JvmBridgeTests.cs
@@ -0,0 +1,36 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.Spark.Sql;
+using Xunit;
+
+namespace Microsoft.Spark.E2ETest.IpcTests
+{
+    [Collection("Spark E2E Tests")]
+    public class JvmBridgeTests
+    {
+        private readonly SparkSession _spark;
+
+        public JvmBridgeTests(SparkFixture fixture)
+        {
+            _spark = fixture.Spark;
+        }
+
+        [Fact]
+        public void TestInnerJvmException()
+        {
+            try
+            {
+                _spark.Sql("THROW!!!");
+            }
+            catch (Exception ex)
+            {
+                Assert.NotNull(ex.InnerException);
+                Assert.IsType<JvmException>(ex.InnerException);
+                Assert.False(string.IsNullOrWhiteSpace(ex.InnerException.Message));
+            }
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridge.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridge.cs
index abfa63b19..231263c74 100644
--- a/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridge.cs
+++ b/src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridge.cs
@@ -189,7 +189,7 @@ private object CallJavaMethod(
                         args);
                     _logger.LogError(errorMessage);
                     _logger.LogError(jvmFullStackTrace);
-                    throw new Exception(errorMessage);
+                    throw new Exception(errorMessage, new JvmException(jvmFullStackTrace));
                 }
 
                 char typeAsChar = Convert.ToChar(inputStream.ReadByte());
diff --git a/src/csharp/Microsoft.Spark/JvmException.cs b/src/csharp/Microsoft.Spark/JvmException.cs
new file mode 100644
index 000000000..75e3aec42
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/JvmException.cs
@@ -0,0 +1,19 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+namespace Microsoft.Spark
+{
+    /// <summary>
+    /// Contains the message returned from the <see cref="Interop.Ipc.JvmBridge"/> on an error.
+    /// </summary>
+    public class JvmException : Exception
+    {
+        public JvmException(string message) 
+            : base(message)
+        {
+        }
+    }
+}

From 886cec05876193d2e7153dd53126deeb88ea22ba Mon Sep 17 00:00:00 2001
From: Steve Suh <suhsteve@gmail.com>
Date: Sat, 27 Jun 2020 12:24:23 -0700
Subject: [PATCH 18/27] AssemblyLoader should use absolute assembly path when
 loading assemblies (#570)

---
 .../AssemblyLoaderTests.cs                    | 26 +++++++++++++++++++
 .../Microsoft.Spark/Utils/AssemblyLoader.cs   |  6 ++---
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
index f2f0dd30e..c2c5e63ee 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
@@ -4,7 +4,11 @@
 
 using System;
 using System.IO;
+using System.Reflection;
+using System.Runtime.Loader;
+using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Utils;
+using Moq;
 using Xunit;
 
 namespace Microsoft.Spark.UnitTest
@@ -12,6 +16,13 @@ namespace Microsoft.Spark.UnitTest
     [Collection("Spark Unit Tests")]
     public class AssemblyLoaderTests
     {
+        private readonly Mock<IJvmBridge> _mockJvm;
+
+        public AssemblyLoaderTests(SparkFixture _fixture)
+        {
+            _mockJvm = _fixture.MockJvm;
+        }
+
         [Fact]
         public void TestAssemblySearchPathResolver()
         {
@@ -45,5 +56,20 @@ public void TestAssemblySearchPathResolver()
                 AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                 null);
         }
+
+        [Fact]
+        public void TestResolveAssemblyWithRelativePath()
+        {
+            _mockJvm.Setup(m => m.CallStaticJavaMethod(
+                "org.apache.spark.SparkFiles",
+                "getRootDirectory"))
+                .Returns(".");
+
+            AssemblyLoader.LoadFromFile = AssemblyLoadContext.Default.LoadFromAssemblyPath;
+            Assembly expectedAssembly = Assembly.GetExecutingAssembly();
+            Assembly actualAssembly = AssemblyLoader.ResolveAssembly(expectedAssembly.FullName);
+
+            Assert.Equal(expectedAssembly, actualAssembly);
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
index 3b9b34f5e..fbc6e199a 100644
--- a/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
+++ b/src/csharp/Microsoft.Spark/Utils/AssemblyLoader.cs
@@ -189,12 +189,12 @@ private static bool TryLoadAssembly(string assemblyFileName, ref Assembly assemb
         {
             foreach (string searchPath in s_searchPaths.Value)
             {
-                string assemblyPath = Path.Combine(searchPath, assemblyFileName);
-                if (File.Exists(assemblyPath))
+                var assemblyFile = new FileInfo(Path.Combine(searchPath, assemblyFileName));
+                if (assemblyFile.Exists)
                 {
                     try
                     {
-                        assembly = LoadFromFile(assemblyPath);
+                        assembly = LoadFromFile(assemblyFile.FullName);
                         return true;
                     }
                     catch (Exception ex) when (

From 2f90321c3d7b6aea9485c09f1b6383fce5cf4a3b Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Sat, 27 Jun 2020 12:25:27 -0700
Subject: [PATCH 19/27] Prep 0.12.1 Release (#572)

---
 README.md                                   |   2 +-
 benchmark/scala/pom.xml                     |   2 +-
 docs/release-notes/0.12.1/release-0.12.1.md | 110 ++++++++++++++++++++
 eng/Versions.props                          |   2 +-
 src/scala/pom.xml                           |   2 +-
 5 files changed, 114 insertions(+), 4 deletions(-)
 create mode 100644 docs/release-notes/0.12.1/release-0.12.1.md

diff --git a/README.md b/README.md
index 5b8647ca0..7aef188eb 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@
     <tbody align="center">
         <tr>
             <td >2.3.*</td>
-            <td rowspan=6><a href="https://github.com/dotnet/spark/releases/tag/v0.12.0">v0.12.0</a></td>
+            <td rowspan=6><a href="https://github.com/dotnet/spark/releases/tag/v0.12.1">v0.12.1</a></td>
         </tr>
         <tr>
             <td>2.4.0</td>
diff --git a/benchmark/scala/pom.xml b/benchmark/scala/pom.xml
index 54608d6ef..3aa7b195a 100644
--- a/benchmark/scala/pom.xml
+++ b/benchmark/scala/pom.xml
@@ -3,7 +3,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.microsoft.spark</groupId>
   <artifactId>microsoft-spark-benchmark</artifactId>
-  <version>0.12.0</version>
+  <version>0.12.1</version>
   <inceptionYear>2019</inceptionYear>
   <properties>
     <encoding>UTF-8</encoding>
diff --git a/docs/release-notes/0.12.1/release-0.12.1.md b/docs/release-notes/0.12.1/release-0.12.1.md
new file mode 100644
index 000000000..53f4d928a
--- /dev/null
+++ b/docs/release-notes/0.12.1/release-0.12.1.md
@@ -0,0 +1,110 @@
+# .NET for Apache Spark 0.12.1 Release Notes
+
+### New Features/Improvements
+
+* Expose `JvmException` to capture JVM error messages separately ([#566](https://github.com/dotnet/spark/pull/566))
+
+### Bug Fixes
+
+* AssemblyLoader should use absolute assembly path when loading assemblies ([570](https://github.com/dotnet/spark/pull/570))
+
+### Infrastructure / Documentation / Etc.
+
+* None
+
+### Breaking Changes
+
+* None
+
+### Known Issues
+
+* Broadcast variables do not work with [dotnet-interactive](https://github.com/dotnet/interactive) ([#561](https://github.com/dotnet/spark/pull/561))
+
+### Compatibility
+
+#### Backward compatibility
+
+The following table describes the oldest version of the worker that the current version is compatible with, along with new features that are incompatible with the worker.
+
+<table>
+    <thead>
+        <tr>
+            <th>Oldest compatible Microsoft.Spark.Worker version</th>
+            <th>Incompatible features</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td rowspan=4>v0.9.0</td>
+            <td>DataFrame with Grouped Map UDF <a href="https://github.com/dotnet/spark/pull/277">(#277)</a></td>
+        </tr>
+        <tr>
+            <td>DataFrame with Vector UDF <a href="https://github.com/dotnet/spark/pull/277">(#277)</a></td>
+        </tr>
+        <tr>
+            <td>Support for Broadcast Variables <a href="https://github.com/dotnet/spark/pull/414">(#414)</a></td>
+        </tr>
+        <tr>
+            <td>Support for TimestampType <a href="https://github.com/dotnet/spark/pull/428">(#428)</a></td>
+        </tr>
+    </tbody>
+</table>
+
+#### Forward compatibility
+
+The following table describes the oldest version of .NET for Apache Spark release that the current worker is compatible with.
+
+<table>
+    <thead>
+        <tr>
+            <th>Oldest compatible .NET for Apache Spark release version</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td>v0.9.0</td>
+        </tr>
+    </tbody>
+</table>
+
+### Supported Spark Versions
+
+The following table outlines the supported Spark versions along with the microsoft-spark JAR to use with:
+
+<table>
+    <thead>
+        <tr>
+            <th>Spark Version</th>
+            <th>microsoft-spark JAR</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td>2.3.*</td>
+            <td>microsoft-spark-2.3.x-0.12.1.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.0</td>
+            <td rowspan=6>microsoft-spark-2.4.x-0.12.1.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.1</td>
+        </tr>
+        <tr>
+            <td>2.4.3</td>
+        </tr>
+        <tr>
+            <td>2.4.4</td>
+        </tr>
+        <tr>
+            <td>2.4.5</td>
+        </tr>
+        <tr>
+            <td>2.4.6</td>
+        </tr>
+        <tr>
+            <td>2.4.2</td>
+            <td><a href="https://github.com/dotnet/spark/issues/60">Not supported</a></td>
+        </tr>
+    </tbody>
+</table>
diff --git a/eng/Versions.props b/eng/Versions.props
index b1d1c2efd..1219678bb 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
-    <VersionPrefix>0.12.0</VersionPrefix>
+    <VersionPrefix>0.12.1</VersionPrefix>
     <PreReleaseVersionLabel>prerelease</PreReleaseVersionLabel>
     <RestoreSources>
       $(RestoreSources);
diff --git a/src/scala/pom.xml b/src/scala/pom.xml
index aacc2da49..035221cd4 100644
--- a/src/scala/pom.xml
+++ b/src/scala/pom.xml
@@ -7,7 +7,7 @@
   <version>${microsoft-spark.version}</version>
   <properties>
     <encoding>UTF-8</encoding>
-    <microsoft-spark.version>0.12.0</microsoft-spark.version>
+    <microsoft-spark.version>0.12.1</microsoft-spark.version>
   </properties>
 
   <modules>

From 2a597d8a980b25c85780dcb6dead057fef2ece07 Mon Sep 17 00:00:00 2001
From: Andrew Fogarty <andrew.f.fogarty@gmail.com>
Date: Tue, 30 Jun 2020 22:47:51 -0700
Subject: [PATCH 20/27] README.md for Extension directory (#569)

---
 src/csharp/Extensions/README.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 src/csharp/Extensions/README.md

diff --git a/src/csharp/Extensions/README.md b/src/csharp/Extensions/README.md
new file mode 100644
index 000000000..fa32b6946
--- /dev/null
+++ b/src/csharp/Extensions/README.md
@@ -0,0 +1,19 @@
+# .Net for Apache Spark Extensions
+
+## Table of Contents
+* [NuGet Packages](#nuget-packages)
+
+## NuGet Packages
+
+The following .Net for Apache Spark extensions are available as NuGet packages:
+
+### First-Party
+
+* [Microsoft.Spark.Extensions.Azure.Synapse.Analytics](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Azure.Synapse.Analytics/)
+* [Microsoft.Spark.Extensions.Delta](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Delta/)
+* [Microsoft.Spark.Extensions.DotNet.Interactive](https://www.nuget.org/packages/Microsoft.Spark.Extensions.DotNet.Interactive/)
+* [Microsoft.Spark.Extensions.Hyperspace](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Hyperspace/)
+
+### Third-Party
+
+* Community-created extensions can be added here.
\ No newline at end of file

From ec8189097135ba67b995b7762faf4225646f270b Mon Sep 17 00:00:00 2001
From: Ed Elliott <GoEddie@users.noreply.github.com>
Date: Wed, 8 Jul 2020 18:35:13 +0100
Subject: [PATCH 21/27] Introduce a base class for Spark.ML.Features (#574)

---
 .../Microsoft.Spark/ML/Feature/Bucketizer.cs  | 143 +++++-------------
 .../Microsoft.Spark/ML/Feature/FeatureBase.cs |  73 +++++++++
 .../Microsoft.Spark/ML/Feature/HashingTF.cs   | 104 ++++---------
 src/csharp/Microsoft.Spark/ML/Feature/IDF.cs  |  82 +++-------
 .../Microsoft.Spark/ML/Feature/IDFModel.cs    |  77 +++-------
 .../Microsoft.Spark/ML/Feature/Tokenizer.cs   |  68 ++-------
 .../Microsoft.Spark/ML/Feature/Word2Vec.cs    |  42 ++---
 .../ML/Feature/Word2VecModel.cs               |  33 +---
 .../Microsoft.Spark/ML/Util/Identifiable.cs   |  15 ++
 9 files changed, 222 insertions(+), 415 deletions(-)
 create mode 100644 src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
 create mode 100644 src/csharp/Microsoft.Spark/ML/Util/Identifiable.cs

diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
index 924c8b362..8b530f66c 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Bucketizer.cs
@@ -20,19 +20,16 @@ namespace Microsoft.Spark.ML.Feature
     /// will be thrown. The splits parameter is only used for single column usage, and splitsArray
     /// is for multiple columns.
     /// </summary>
-    public class Bucketizer : IJvmObjectReferenceProvider
+    public class Bucketizer : FeatureBase<Bucketizer>, IJvmObjectReferenceProvider
     {
         private static readonly string s_bucketizerClassName = 
             "org.apache.spark.ml.feature.Bucketizer";
         
-        private readonly JvmObjectReference _jvmObject;
-
         /// <summary>
         /// Create a <see cref="Bucketizer"/> without any parameters
         /// </summary>
-        public Bucketizer()
+        public Bucketizer() : base(s_bucketizerClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_bucketizerClassName);
         }
 
         /// <summary>
@@ -40,14 +37,12 @@ public Bucketizer()
         /// <see cref="Bucketizer"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public Bucketizer(string uid)
+        public Bucketizer(string uid) : base(s_bucketizerClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_bucketizerClassName, uid);
         }
         
-        internal Bucketizer(JvmObjectReference jvmObject)
+        internal Bucketizer(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
 
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -56,11 +51,8 @@ internal Bucketizer(JvmObjectReference jvmObject)
         /// Gets the splits that were set using SetSplits
         /// </summary>
         /// <returns>double[], the splits to be used to bucket the input column</returns>
-        public double[] GetSplits()
-        {
-            return (double[])_jvmObject.Invoke("getSplits");
-        }
-        
+        public double[] GetSplits() => (double[])_jvmObject.Invoke("getSplits");
+
         /// <summary>
         /// Split points for splitting a single column into buckets. To split multiple columns use
         /// SetSplitsArray. You cannot use both SetSplits and SetSplitsArray at the same time
@@ -72,20 +64,15 @@ public double[] GetSplits()
         /// increasing. Values outside the splits specified will be treated as errors.
         /// </param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetSplits(double[] value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setSplits", value));
-        }
+        public Bucketizer SetSplits(double[] value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setSplits", value));
 
         /// <summary>
         /// Gets the splits that were set by SetSplitsArray
         /// </summary>
         /// <returns>double[][], the splits to be used to bucket the input columns</returns>
-        public double[][] GetSplitsArray()
-        {
-            return (double[][])_jvmObject.Invoke("getSplitsArray");
-        } 
-        
+        public double[][] GetSplitsArray() => (double[][])_jvmObject.Invoke("getSplitsArray");
+
         /// <summary>
         /// Split points fot splitting multiple columns into buckets. To split a single column use
         /// SetSplits. You cannot use both SetSplits and SetSplitsArray at the same time.
@@ -97,41 +84,32 @@ public double[][] GetSplitsArray()
         /// includes y. The splits should be of length &gt;= 3 and strictly increasing.
         /// Values outside the splits specified will be treated as errors.</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetSplitsArray(double[][] value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setSplitsArray", (object)value));
-        }
+        public Bucketizer SetSplitsArray(double[][] value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setSplitsArray", (object)value));
 
         /// <summary>
         /// Gets the column that the <see cref="Bucketizer"/> should read from and convert into
         /// buckets. This would have been set by SetInputCol
         /// </summary>
         /// <returns>string, the input column</returns>
-        public string GetInputCol()
-        {
-            return (string)_jvmObject.Invoke("getInputCol");
-        }
-        
+        public string GetInputCol() => (string)_jvmObject.Invoke("getInputCol");
+
         /// <summary>
         /// Sets the column that the <see cref="Bucketizer"/> should read from and convert into
         /// buckets
         /// </summary>
         /// <param name="value">The name of the column to as the source of the buckets</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetInputCol(string value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setInputCol", value));
-        }
-        
+        public Bucketizer SetInputCol(string value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setInputCol", value));
+
         /// <summary>
         /// Gets the columns that <see cref="Bucketizer"/> should read from and convert into
         /// buckets. This is set by SetInputCol
         /// </summary>
        /// <returns>IEnumerable&lt;string&gt;, list of input columns</returns>
-        public IEnumerable<string> GetInputCols()
-        {
-            return ((string[])(_jvmObject.Invoke("getInputCols"))).ToList();
-        }
+        public IEnumerable<string> GetInputCols() => 
+            ((string[])(_jvmObject.Invoke("getInputCols"))).ToList();
 
         /// <summary>
         /// Sets the columns that <see cref="Bucketizer"/> should read from and convert into
@@ -142,73 +120,50 @@ public IEnumerable<string> GetInputCols()
         /// </summary>
         /// <param name="value">List of input columns to use as sources for buckets</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetInputCols(IEnumerable<string> value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setInputCols", value));
-        }
-        
+        public Bucketizer SetInputCols(IEnumerable<string> value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setInputCols", value));
+
         /// <summary>
         /// Gets the name of the column the output data will be written to. This is set by
         /// SetInputCol
         /// </summary>
         /// <returns>string, the output column</returns>
-        public string GetOutputCol()
-        {
-            return (string)_jvmObject.Invoke("getOutputCol");
-        }
-        
+        public string GetOutputCol() => (string)_jvmObject.Invoke("getOutputCol");
+
         /// <summary>
         /// The <see cref="Bucketizer"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
         /// </summary>
         /// <param name="value">The name of the new column which contains the bucket ID</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetOutputCol(string value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setOutputCol", value));
-        }
+        public Bucketizer SetOutputCol(string value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setOutputCol", value));
 
         /// <summary>
         /// The list of columns that the <see cref="Bucketizer"/> will create in the DataFrame.
         /// This is set by SetOutputCols
         /// </summary>
         /// <returns>IEnumerable&lt;string&gt;, list of output columns</returns>
-        public IEnumerable<string> GetOutputCols()
-        {
-            return ((string[])_jvmObject.Invoke("getOutputCols")).ToList();
-        }
-        
+        public IEnumerable<string> GetOutputCols() => 
+            ((string[])_jvmObject.Invoke("getOutputCols")).ToList();
+
         /// <summary>
         /// The list of columns that the <see cref="Bucketizer"/> will create in the DataFrame.
         /// </summary>
         /// <param name="value">List of column names which will contain the bucket ID</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetOutputCols(List<string> value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setOutputCols", value));
-        }
-        
+        public Bucketizer SetOutputCols(List<string> value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setOutputCols", value));
+
         /// <summary>
         /// Loads the <see cref="Bucketizer"/> that was previously saved using Save
         /// </summary>
         /// <param name="path">The path the previous <see cref="Bucketizer"/> was saved to</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public static Bucketizer Load(string path)
-        {
-            return WrapAsBucketizer(
+        public static Bucketizer Load(string path) =>
+            WrapAsBucketizer(
                 SparkEnvironment.JvmBridge.CallStaticJavaMethod(
                     s_bucketizerClassName,"load", path));
-        }
-        
-        /// <summary>
-        /// Saves the <see cref="Bucketizer"/> so that it can be loaded later using Load
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="Bucketizer"/> to</param>
-        /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer Save(string path)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("save", path));
-        }
 
         /// <summary>
         /// Executes the <see cref="Bucketizer"/> and transforms the DataFrame to include the new
@@ -218,31 +173,15 @@ public Bucketizer Save(string path)
         /// <returns>
         /// <see cref="DataFrame"/> containing the original data and the new bucketed columns
         /// </returns>
-        public DataFrame Transform(DataFrame source)
-        {
-            return new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
-        }
-
-        /// <summary>
-        /// The uid that was used to create the <see cref="Bucketizer"/>. If no UID is passed in
-        /// when creating the <see cref="Bucketizer"/> then a random UID is created when the
-        /// <see cref="Bucketizer"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="Bucketizer"/></returns>
-        public string Uid()
-        {
-            return (string)_jvmObject.Invoke("uid");
-        }
+        public DataFrame Transform(DataFrame source) => 
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
 
         /// <summary>
         /// How should the <see cref="Bucketizer"/> handle invalid data, choices are "skip",
         /// "error" or "keep"
         /// </summary>
         /// <returns>string showing the way Spark will handle invalid data</returns>
-        public string GetHandleInvalid()
-        {
-            return (string)_jvmObject.Invoke("getHandleInvalid");
-        }
+        public string GetHandleInvalid() => (string)_jvmObject.Invoke("getHandleInvalid");
 
         /// <summary>
         /// Tells the <see cref="Bucketizer"/> what to do with invalid data.
@@ -251,11 +190,9 @@ public string GetHandleInvalid()
         /// </summary>
         /// <param name="value">"skip", "error" or "keep"</param>
         /// <returns>New <see cref="Bucketizer"/> object</returns>
-        public Bucketizer SetHandleInvalid(string value)
-        {
-            return WrapAsBucketizer(_jvmObject.Invoke("setHandleInvalid", value.ToString()));
-        }
-        
+        public Bucketizer SetHandleInvalid(string value) => 
+            WrapAsBucketizer(_jvmObject.Invoke("setHandleInvalid", value.ToString()));
+
         private static Bucketizer WrapAsBucketizer(object obj) => 
             new Bucketizer((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs b/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
new file mode 100644
index 000000000..d47339178
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
@@ -0,0 +1,73 @@
+using System;
+using System.Linq;
+using System.Reflection;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.ML.Feature
+{
+    /// <summary>
+    /// FeatureBase is to share code amongst all of the ML.Feature objects, there are a few
+    /// interfaces that the Scala code implements across all of the objects. This should help to
+    /// write the extra objects faster.
+    /// </summary>
+    /// <typeparam name="T">
+    /// The class that implements FeatureBase, this is needed so we can create new objects where
+    /// spark returns new objects rather than update existing objects.
+    /// </typeparam>
+    public class FeatureBase<T> : Identifiable
+    {
+        internal readonly JvmObjectReference _jvmObject;
+        
+        internal FeatureBase(string className)
+            : this(SparkEnvironment.JvmBridge.CallConstructor(className))
+        {
+        }
+        
+        internal FeatureBase(string className, string uid)
+            : this(SparkEnvironment.JvmBridge.CallConstructor(className, uid))
+        {
+        }
+        
+        internal FeatureBase(JvmObjectReference jvmObject)
+        {
+            _jvmObject = jvmObject;
+        }
+
+        /// <summary>
+        /// Returns the JVM toString value rather than the .NET ToString default
+        /// </summary>
+        /// <returns>JVM toString() value</returns>
+        public override string ToString() => (string)_jvmObject.Invoke("toString");
+        
+        /// <summary>
+        /// The UID that was used to create the object. If no UID is passed in when creating the
+        /// object then a random UID is created when the object is created.
+        /// </summary>
+        /// <returns>string UID identifying the object</returns>
+        public string Uid() => (string)_jvmObject.Invoke("uid");
+
+        /// <summary>
+        /// Saves the object so that it can be loaded later using Load. Note that these objects
+        /// can be shared with Scala by Loading or Saving in Scala.
+        /// </summary>
+        /// <param name="path">The path to save the object to</param>
+        /// <returns>New object</returns>
+        public T Save(string path) => 
+            WrapAsType((JvmObjectReference)_jvmObject.Invoke("save", path));
+
+        private T WrapAsType(JvmObjectReference reference)
+        {
+            ConstructorInfo constructor = typeof(T)
+                .GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance)
+                .Single(c =>
+                {
+                    ParameterInfo[] parameters = c.GetParameters();
+                    return (parameters.Length == 1) &&
+                        (parameters[0].ParameterType == typeof(JvmObjectReference));
+                });
+
+            return (T)constructor.Invoke(new object[] {reference});
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/HashingTF.cs b/src/csharp/Microsoft.Spark/ML/Feature/HashingTF.cs
index 50b4fe04a..d4e815d66 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/HashingTF.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/HashingTF.cs
@@ -19,34 +19,29 @@ namespace Microsoft.Spark.ML.Feature
     /// power of two as the numFeatures parameter; otherwise the features will not be mapped evenly
     /// to the columns.
     /// </summary>
-    public class HashingTF : IJvmObjectReferenceProvider
+    public class HashingTF : FeatureBase<HashingTF>, IJvmObjectReferenceProvider
     {
         private static readonly string s_hashingTfClassName = 
             "org.apache.spark.ml.feature.HashingTF";
-        
-        private readonly JvmObjectReference _jvmObject;
-        
+
         /// <summary>
         /// Create a <see cref="HashingTF"/> without any parameters
         /// </summary>
-        public HashingTF()
+        public HashingTF() : base(s_hashingTfClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_hashingTfClassName);
         }
 
         /// <summary>
         /// Create a <see cref="HashingTF"/> with a UID that is used to give the
         /// <see cref="HashingTF"/> a unique ID
-        /// <param name="uid">unique identifier</param>
         /// </summary>
-        public HashingTF(string uid)
+        /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
+        public HashingTF(string uid) : base(s_hashingTfClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_hashingTfClassName, uid);
         }
         
-        internal HashingTF(JvmObjectReference jvmObject)
+        internal HashingTF(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
 
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -56,31 +51,16 @@ internal HashingTF(JvmObjectReference jvmObject)
         /// </summary>
         /// <param name="path">The path the previous <see cref="HashingTF"/> was saved to</param>
         /// <returns>New <see cref="HashingTF"/> object</returns>
-        public static HashingTF Load(string path)
-        {
-            return WrapAsHashingTF(
+        public static HashingTF Load(string path) =>
+            WrapAsHashingTF(
                 SparkEnvironment.JvmBridge.CallStaticJavaMethod(
                     s_hashingTfClassName, "load", path));
-        }
-        
-        /// <summary>
-        /// Saves the <see cref="HashingTF"/> so that it can be loaded later using Load
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="HashingTF"/> to</param>
-        /// <returns>New <see cref="HashingTF"/> object</returns>
-        public HashingTF Save(string path)
-        {
-            return WrapAsHashingTF(_jvmObject.Invoke("save", path));
-        }
-        
+
         /// <summary>
         /// Gets the binary toggle that controls term frequency counts
         /// </summary>
         /// <returns>Flag showing whether the binary toggle is on or off</returns>
-        public bool GetBinary()
-        {
-            return (bool)_jvmObject.Invoke("getBinary");
-        }
+        public bool GetBinary() => (bool)_jvmObject.Invoke("getBinary");
 
         /// <summary>
         /// Binary toggle to control term frequency counts.
@@ -88,50 +68,38 @@ public bool GetBinary()
         /// models that model binary events rather than integer counts
         ///</summary>
         /// <param name="value">binary toggle, default is false</param>
-        public HashingTF SetBinary(bool value)
-        {
-            return WrapAsHashingTF(_jvmObject.Invoke("setBinary", value));
-        }
-        
+        public HashingTF SetBinary(bool value) => 
+            WrapAsHashingTF(_jvmObject.Invoke("setBinary", value));
+
         /// <summary>
         /// Gets the column that the <see cref="HashingTF"/> should read from
         /// </summary>
         /// <returns>string, the name of the input column</returns>
-        public string GetInputCol()
-        {
-            return (string)_jvmObject.Invoke("getInputCol");
-        }
-        
+        public string GetInputCol() => (string)_jvmObject.Invoke("getInputCol");
+
         /// <summary>
         /// Sets the column that the <see cref="HashingTF"/> should read from
         /// </summary>
         /// <param name="value">The name of the column to as the source</param>
         /// <returns>New <see cref="HashingTF"/> object</returns>
-        public HashingTF SetInputCol(string value)
-        {
-            return WrapAsHashingTF(_jvmObject.Invoke("setInputCol", value));
-        }
+        public HashingTF SetInputCol(string value) => 
+            WrapAsHashingTF(_jvmObject.Invoke("setInputCol", value));
 
         /// <summary>
         /// The <see cref="HashingTF"/> will create a new column in the <see cref="DataFrame"/>,
         /// this is the name of the new column.
         /// </summary>
         /// <returns>string, the name of the output col</returns>
-        public string GetOutputCol()
-        {
-            return (string)_jvmObject.Invoke("getOutputCol");
-        }
-        
+        public string GetOutputCol() => (string)_jvmObject.Invoke("getOutputCol");
+
         /// <summary>
         /// The <see cref="HashingTF"/> will create a new column in the <see cref="DataFrame"/>,
         /// this is the name of the new column.
         /// </summary>
         /// <param name="value">The name of the new column</param>
         /// <returns>New <see cref="HashingTF"/> object</returns>
-        public HashingTF SetOutputCol(string value)
-        {
-            return WrapAsHashingTF(_jvmObject.Invoke("setOutputCol", value));
-        }
+        public HashingTF SetOutputCol(string value) => 
+            WrapAsHashingTF(_jvmObject.Invoke("setOutputCol", value));
 
         /// <summary>
         /// Gets the number of features that should be used. Since a simple modulo is used to
@@ -140,11 +108,8 @@ public HashingTF SetOutputCol(string value)
         /// columns.
         /// </summary>
         /// <returns>The number of features to be used</returns>
-        public int GetNumFeatures()
-        {
-            return (int)_jvmObject.Invoke("getNumFeatures");
-        }
-        
+        public int GetNumFeatures() => (int)_jvmObject.Invoke("getNumFeatures");
+
         /// <summary>
         /// Sets the number of features that should be used. Since a simple modulo is used to
         /// transform the hash function to a column index, it is advisable to use a power of two as
@@ -153,19 +118,8 @@ public int GetNumFeatures()
         /// </summary>
         /// <param name="value">int</param>
         /// <returns>New <see cref="HashingTF"/> object</returns>
-        public HashingTF SetNumFeatures(int value)
-        {
-            return WrapAsHashingTF(_jvmObject.Invoke("setNumFeatures", value));
-        }
-
-        /// <summary>
-        /// An immutable unique ID for the object and its derivatives.
-        /// </summary>
-        /// <returns>string, unique ID for the object</returns>
-        public string Uid()
-        {
-            return (string)_jvmObject.Invoke("uid");
-        }
+        public HashingTF SetNumFeatures(int value) => 
+            WrapAsHashingTF(_jvmObject.Invoke("setNumFeatures", value));
 
         /// <summary>
         /// Executes the <see cref="HashingTF"/> and transforms the DataFrame to include the new
@@ -173,11 +127,9 @@ public string Uid()
         /// </summary>
         /// <param name="source">The <see cref="DataFrame"/> to add the tokens to</param>
         /// <returns><see cref="DataFrame"/> containing the original data and the tokens</returns>
-        public DataFrame Transform(DataFrame source)
-        {
-            return new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
-        }
-        
+        public DataFrame Transform(DataFrame source) => 
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
+
         private static HashingTF WrapAsHashingTF(object obj) => 
             new HashingTF((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/IDF.cs b/src/csharp/Microsoft.Spark/ML/Feature/IDF.cs
index 5c2259aaf..56d2fa59f 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/IDF.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/IDF.cs
@@ -17,18 +17,15 @@ namespace Microsoft.Spark.ML.Feature
     /// of documents (controlled by the variable minDocFreq). For terms that are not in at least
     /// minDocFreq documents, the IDF is found as 0, resulting in TF-IDFs of 0.
     /// </summary>
-    public class IDF : IJvmObjectReferenceProvider
+    public class IDF : FeatureBase<IDF>, IJvmObjectReferenceProvider
     {
         private static readonly string s_IDFClassName = "org.apache.spark.ml.feature.IDF";
         
-        private readonly JvmObjectReference _jvmObject;
-        
         /// <summary>
         /// Create a <see cref="IDF"/> without any parameters
         /// </summary>
-        public IDF()
+        public IDF() : base(s_IDFClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_IDFClassName);
         }
 
         /// <summary>
@@ -36,14 +33,12 @@ public IDF()
         /// <see cref="IDF"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public IDF(string uid)
+        public IDF(string uid) : base(s_IDFClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_IDFClassName, uid);
         }
         
-        internal IDF(JvmObjectReference jvmObject)
+        internal IDF(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
         
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -52,82 +47,53 @@ internal IDF(JvmObjectReference jvmObject)
         /// Gets the column that the <see cref="IDF"/> should read from
         /// </summary>
         /// <returns>string, input column</returns>
-        public string GetInputCol()
-        {
-            return (string)(_jvmObject.Invoke("getInputCol"));
-        }
-        
+        public string GetInputCol() => (string)(_jvmObject.Invoke("getInputCol"));
+
         /// <summary>
         /// Sets the column that the <see cref="IDF"/> should read from
         /// </summary>
         /// <param name="value">The name of the column to as the source</param>
         /// <returns>New <see cref="IDF"/> object</returns>
-        public IDF SetInputCol(string value)
-        {
-            return WrapAsIDF(_jvmObject.Invoke("setInputCol", value));
-        }
+        public IDF SetInputCol(string value) => WrapAsIDF(_jvmObject.Invoke("setInputCol", value));
 
         /// <summary>
         /// The <see cref="IDF"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
         /// </summary>
         /// <returns>string, the output column</returns>
-        public string GetOutputCol()
-        {
-            return (string)(_jvmObject.Invoke("getOutputCol"));
-        }
-        
+        public string GetOutputCol() => (string)(_jvmObject.Invoke("getOutputCol"));
+
         /// <summary>
         /// The <see cref="IDF"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
         /// </summary>
         /// <param name="value">The name of the new column</param>
         /// <returns>New <see cref="IDF"/> object</returns>
-        public IDF SetOutputCol(string value)
-        {
-            return WrapAsIDF(_jvmObject.Invoke("setOutputCol", value));
-        }
+        public IDF SetOutputCol(string value) => 
+            WrapAsIDF(_jvmObject.Invoke("setOutputCol", value));
 
         /// <summary>
         /// Minimum of documents in which a term should appear for filtering
         /// </summary>
         /// <returns>int, minimum number of documents in which a term should appear</returns>
-        public int GetMinDocFreq()
-        {
-            return (int)_jvmObject.Invoke("getMinDocFreq");
-        }
-        
+        public int GetMinDocFreq() => (int)_jvmObject.Invoke("getMinDocFreq");
+
         /// <summary>
         /// Minimum of documents in which a term should appear for filtering
         /// </summary>
         /// <param name="value">int, the minimum of documents a term should appear in</param>
         /// <returns>New <see cref="IDF"/> object</returns>
-        public IDF SetMinDocFreq(int value)
-        {
-            return WrapAsIDF(_jvmObject.Invoke("setMinDocFreq", value));
-        }
-        
+        public IDF SetMinDocFreq(int value) => 
+            WrapAsIDF(_jvmObject.Invoke("setMinDocFreq", value));
+
         /// <summary>
         /// Fits a model to the input data.
         /// </summary>
         /// <param name="source">The <see cref="DataFrame"/> to fit the model to</param>
         /// <returns>New <see cref="IDFModel"/> object</returns>
-        public IDFModel Fit(DataFrame source)
-        {
-            return new IDFModel((JvmObjectReference)_jvmObject.Invoke("fit", source));
-        }
+        public IDFModel Fit(DataFrame source) => 
+            new IDFModel((JvmObjectReference)_jvmObject.Invoke("fit", source));
 
-        /// <summary>
-        /// The uid that was used to create the <see cref="IDF"/>. If no UID is passed in
-        /// when creating the <see cref="IDF"/> then a random UID is created when the
-        /// <see cref="IDF"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="IDF"/></returns>
-        public string Uid()
-        {
-            return (string)_jvmObject.Invoke("uid");
-        }
-        
         /// <summary>
         /// Loads the <see cref="IDF"/> that was previously saved using Save
         /// </summary>
@@ -138,17 +104,7 @@ public static IDF Load(string path)
             return WrapAsIDF(
                 SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_IDFClassName, "load", path));
         }
-        
-        /// <summary>
-        /// Saves the <see cref="IDF"/> so that it can be loaded later using Load
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="IDF"/> to</param>
-        /// <returns>New <see cref="IDF"/> object</returns>
-        public IDF Save(string path)
-        {
-            return WrapAsIDF(_jvmObject.Invoke("save", path));
-        }
-
+ 
         private static IDF WrapAsIDF(object obj) => new IDF((JvmObjectReference)obj);
     }
 }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/IDFModel.cs b/src/csharp/Microsoft.Spark/ML/Feature/IDFModel.cs
index 4fc8a4f30..31da6e153 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/IDFModel.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/IDFModel.cs
@@ -12,19 +12,16 @@ namespace Microsoft.Spark.ML.Feature
     /// A <see cref="IDFModel"/> that converts the input string to lowercase and then splits it by
     /// white spaces.
     /// </summary>
-    public class IDFModel : IJvmObjectReferenceProvider
+    public class IDFModel : FeatureBase<IDFModel>, IJvmObjectReferenceProvider
     {
         private static readonly string s_IDFModelClassName = 
             "org.apache.spark.ml.feature.IDFModel";
         
-        private readonly JvmObjectReference _jvmObject;
-
         /// <summary>
         /// Create a <see cref="IDFModel"/> without any parameters
         /// </summary>
-        public IDFModel()
+        public IDFModel() : base(s_IDFModelClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_IDFModelClassName);
         }
 
         /// <summary>
@@ -32,14 +29,12 @@ public IDFModel()
         /// <see cref="IDFModel"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public IDFModel(string uid)
+        public IDFModel(string uid) : base(s_IDFModelClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_IDFModelClassName, uid);
         }
         
-        internal IDFModel(JvmObjectReference jvmObject)
+        internal IDFModel(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
         
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -48,32 +43,24 @@ internal IDFModel(JvmObjectReference jvmObject)
         /// Gets the column that the <see cref="IDFModel"/> should read from
         /// </summary>
         /// <returns>string, input column</returns>
-        public string GetInputCol()
-        {
-            return (string)(_jvmObject.Invoke("getInputCol"));
-        }
-        
+        public string GetInputCol() => (string)(_jvmObject.Invoke("getInputCol"));
+
         /// <summary>
         /// Sets the column that the <see cref="IDFModel"/> should read from and convert into
         /// buckets
         /// </summary>
         /// <param name="value">The name of the column to as the source</param>
         /// <returns>New <see cref="IDFModel"/> object</returns>
-        public IDFModel SetInputCol(string value)
-        {
-            return WrapAsIDFModel(_jvmObject.Invoke("setInputCol", value));
-        }
+        public IDFModel SetInputCol(string value) => 
+            WrapAsIDFModel(_jvmObject.Invoke("setInputCol", value));
 
         /// <summary>
         /// The <see cref="IDFModel"/> will create a new column in the <see cref="DataFrame"/>,
         /// this is the name of the new column.
         /// </summary>
         /// <returns>string, the output column</returns>
-        public string GetOutputCol()
-        {
-            return (string)(_jvmObject.Invoke("getOutputCol"));
-        }
-        
+        public string GetOutputCol() => (string)(_jvmObject.Invoke("getOutputCol"));
+
         /// <summary>
         /// The <see cref="IDFModel"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
@@ -81,42 +68,24 @@ public string GetOutputCol()
         /// <param name="value">The name of the new column which contains the tokens
         /// </param>
         /// <returns>New <see cref="IDFModel"/> object</returns>
-        public IDFModel SetOutputCol(string value)
-        {
-            return WrapAsIDFModel(_jvmObject.Invoke("setOutputCol", value));
-        }
-        
+        public IDFModel SetOutputCol(string value) => 
+            WrapAsIDFModel(_jvmObject.Invoke("setOutputCol", value));
+
         /// <summary>
         /// Minimum of documents in which a term should appear for filtering
         /// </summary>
         /// <returns>Minimum number of documents a term should appear</returns>
-        public int GetMinDocFreq()
-        {
-            return (int)_jvmObject.Invoke("getMinDocFreq");
-        }
-        
+        public int GetMinDocFreq() => (int)_jvmObject.Invoke("getMinDocFreq");
+
         /// <summary>
         /// Executes the <see cref="IDFModel"/> and transforms the <see cref="DataFrame"/> to
         /// include the new column or columns with the tokens.
         /// </summary>
         /// <param name="source">The <see cref="DataFrame"/> to add the tokens to</param>
         /// <returns><see cref="DataFrame"/> containing the original data and the tokens</returns>
-        public DataFrame Transform(DataFrame source)
-        {
-            return new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
-        }
+        public DataFrame Transform(DataFrame source) => 
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
 
-        /// <summary>
-        /// The uid that was used to create the <see cref="IDFModel"/>. If no UID is passed in
-        /// when creating the <see cref="IDFModel"/> then a random UID is created when the
-        /// <see cref="IDFModel"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="IDFModel"/></returns>
-        public string Uid()
-        {
-            return (string)_jvmObject.Invoke("uid");
-        }
-        
         /// <summary>
         /// Loads the <see cref="IDFModel"/> that was previously saved using Save
         /// </summary>
@@ -128,17 +97,7 @@ public static IDFModel Load(string path)
                 SparkEnvironment.JvmBridge.CallStaticJavaMethod(
                     s_IDFModelClassName, "load", path));
         }
-        
-        /// <summary>
-        /// Saves the <see cref="IDFModel"/> so that it can be loaded later using Load
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="IDFModel"/> to</param>
-        /// <returns>New <see cref="IDFModel"/> object</returns>
-        public IDFModel Save(string path)
-        {
-            return WrapAsIDFModel(_jvmObject.Invoke("save", path));
-        }
-        
+
         private static IDFModel WrapAsIDFModel(object obj) => 
             new IDFModel((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Tokenizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Tokenizer.cs
index c411309dc..cf5ad84f7 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Tokenizer.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Tokenizer.cs
@@ -12,19 +12,16 @@ namespace Microsoft.Spark.ML.Feature
     /// A <see cref="Tokenizer"/> that converts the input string to lowercase and then splits it by
     /// white spaces.
     /// </summary>
-    public class Tokenizer : IJvmObjectReferenceProvider
+    public class Tokenizer : FeatureBase<Tokenizer>, IJvmObjectReferenceProvider
     {
         private static readonly string s_tokenizerClassName = 
             "org.apache.spark.ml.feature.Tokenizer";
         
-        private readonly JvmObjectReference _jvmObject;
-        
         /// <summary>
         /// Create a <see cref="Tokenizer"/> without any parameters
         /// </summary>
-        public Tokenizer()
+        public Tokenizer() : base(s_tokenizerClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_tokenizerClassName);
         }
 
         /// <summary>
@@ -32,14 +29,12 @@ public Tokenizer()
         /// <see cref="Tokenizer"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public Tokenizer(string uid)
+        public Tokenizer(string uid) : base(s_tokenizerClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_tokenizerClassName, uid);
         }
         
-        internal Tokenizer(JvmObjectReference jvmObject)
+        internal Tokenizer(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
         
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -48,42 +43,32 @@ internal Tokenizer(JvmObjectReference jvmObject)
         /// Gets the column that the <see cref="Tokenizer"/> should read from
         /// </summary>
         /// <returns>string, input column</returns>
-        public string GetInputCol()
-        {
-            return (string)(_jvmObject.Invoke("getInputCol"));
-        }
-        
+        public string GetInputCol() => (string)(_jvmObject.Invoke("getInputCol"));
+
         /// <summary>
         /// Sets the column that the <see cref="Tokenizer"/> should read from
         /// </summary>
         /// <param name="value">The name of the column to as the source</param>
         /// <returns>New <see cref="Tokenizer"/> object</returns>
-        public Tokenizer SetInputCol(string value)
-        {
-            return WrapAsTokenizer(_jvmObject.Invoke("setInputCol", value));
-        }
+        public Tokenizer SetInputCol(string value) => 
+            WrapAsTokenizer(_jvmObject.Invoke("setInputCol", value));
 
         /// <summary>
         /// The <see cref="Tokenizer"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
         /// </summary>
         /// <returns>string, the output column</returns>
-        public string GetOutputCol()
-        {
-            return (string)(_jvmObject.Invoke("getOutputCol"));
-        }
-        
+        public string GetOutputCol() => (string)(_jvmObject.Invoke("getOutputCol"));
+
         /// <summary>
         /// The <see cref="Tokenizer"/> will create a new column in the DataFrame, this is the
         /// name of the new column.
         /// </summary>
         /// <param name="value">The name of the new column</param>
         /// <returns>New <see cref="Tokenizer"/> object</returns>
-        public Tokenizer SetOutputCol(string value)
-        {
-            return WrapAsTokenizer(_jvmObject.Invoke("setOutputCol", value));
-        }
-        
+        public Tokenizer SetOutputCol(string value) => 
+            WrapAsTokenizer(_jvmObject.Invoke("setOutputCol", value));
+
         /// <summary>
         /// Executes the <see cref="Tokenizer"/> and transforms the DataFrame to include the new
         /// column
@@ -92,22 +77,9 @@ public Tokenizer SetOutputCol(string value)
         /// <returns>
         /// New <see cref="DataFrame"/> object with the source <see cref="DataFrame"/> transformed
         /// </returns>
-        public DataFrame Transform(DataFrame source)
-        {
-            return new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
-        }
+        public DataFrame Transform(DataFrame source) => 
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
 
-        /// <summary>
-        /// The uid that was used to create the <see cref="Tokenizer"/>. If no UID is passed in
-        /// when creating the <see cref="Tokenizer"/> then a random UID is created when the
-        /// <see cref="Tokenizer"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="Tokenizer"/></returns>
-        public string Uid()
-        {
-            return (string)_jvmObject.Invoke("uid");
-        }
-        
         /// <summary>
         /// Loads the <see cref="Tokenizer"/> that was previously saved using Save
         /// </summary>
@@ -120,16 +92,6 @@ public static Tokenizer Load(string path)
                     s_tokenizerClassName, "load", path));
         }
         
-        /// <summary>
-        /// Saves the <see cref="Tokenizer"/> so that it can be loaded later using Load
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="Tokenizer"/> to</param>
-        /// <returns>New <see cref="Tokenizer"/> object</returns>
-        public Tokenizer Save(string path)
-        {
-            return WrapAsTokenizer(_jvmObject.Invoke("save", path));
-        }
-        
         private static Tokenizer WrapAsTokenizer(object obj) => 
             new Tokenizer((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Word2Vec.cs b/src/csharp/Microsoft.Spark/ML/Feature/Word2Vec.cs
index 977194c8a..d272b1921 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Word2Vec.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Word2Vec.cs
@@ -8,36 +8,29 @@
 
 namespace Microsoft.Spark.ML.Feature
 {
-    public class Word2Vec : IJvmObjectReferenceProvider
+    public class Word2Vec : FeatureBase<Word2Vec>, IJvmObjectReferenceProvider
     {
         private static readonly string s_word2VecClassName = 
             "org.apache.spark.ml.feature.Word2Vec";
-        
-        private readonly JvmObjectReference _jvmObject;
-        
+
         /// <summary>
-        /// Create a <see cref="Word2Vec"/> without any parameters. Once you have created a
-        /// <see cref="Word2Vec"/> you must call <see cref="SetInputCol(string)"/>,
-        /// <see cref="SetOutputCol(string)"/>, and <see cref="SetMinCount(int)"/>.
+        /// Create a <see cref="Word2Vec"/> without any parameters
         /// </summary>
-        public Word2Vec()
+        public Word2Vec() : base(s_word2VecClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_word2VecClassName);
         }
 
         /// <summary>
         /// Create a <see cref="Word2Vec"/> with a UID that is used to give the
-        /// <see cref="Word2Vec"/> a unique ID.
+        /// <see cref="Word2Vec"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public Word2Vec(string uid)
+        public Word2Vec(string uid) : base(s_word2VecClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_word2VecClassName, uid);
         }
         
-        internal Word2Vec(JvmObjectReference jvmObject)
+        internal Word2Vec(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
         
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -190,30 +183,13 @@ public Word2VecModel Fit(DataFrame dataFrame) =>
             new Word2VecModel((JvmObjectReference)_jvmObject.Invoke("fit", dataFrame));
 
         /// <summary>
-        /// The uid that was used to create the <see cref="Word2Vec"/>. If no UID is passed in
-        /// when creating the <see cref="Word2Vec"/> then a random UID is created when the
-        /// <see cref="Word2Vec"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="Word2Vec"/>.</returns>
-        public string Uid() => (string)_jvmObject.Invoke("uid");
-
-        /// <summary>
-        /// Loads the <see cref="Word2Vec"/> that was previously saved using
-        /// <see cref="Save(string)"/>.
+        /// Loads the <see cref="Word2Vec"/> that was previously saved using Save(string).
         /// </summary>
         /// <param name="path">The path the previous <see cref="Word2Vec"/> was saved to</param>
         /// <returns>New <see cref="Word2Vec"/> object, loaded from path.</returns>
         public static Word2Vec Load(string path) => WrapAsWord2Vec(
             SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_word2VecClassName, "load", path));
-
-        /// <summary>
-        /// Saves the <see cref="Word2Vec"/> so that it can be loaded later using
-        /// <see cref="Load(string)"/>.
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="Word2Vec"/> to.</param>
-        /// <returns>New <see cref="Word2Vec"/> object.</returns>
-        public Word2Vec Save(string path) => WrapAsWord2Vec(_jvmObject.Invoke("save", path));
-
+        
         private static Word2Vec WrapAsWord2Vec(object obj) => 
             new Word2Vec((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Word2VecModel.cs b/src/csharp/Microsoft.Spark/ML/Feature/Word2VecModel.cs
index 2d3ca704a..b49223619 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Word2VecModel.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Word2VecModel.cs
@@ -8,19 +8,16 @@
 
 namespace Microsoft.Spark.ML.Feature
 {
-    public class Word2VecModel : IJvmObjectReferenceProvider
+    public class Word2VecModel : FeatureBase<Word2VecModel>, IJvmObjectReferenceProvider
     {
         private static readonly string s_word2VecModelClassName = 
             "org.apache.spark.ml.feature.Word2VecModel";
-        
-        private readonly JvmObjectReference _jvmObject;
 
         /// <summary>
         /// Create a <see cref="Word2VecModel"/> without any parameters
         /// </summary>
-        public Word2VecModel()
+        public Word2VecModel() : base(s_word2VecModelClassName)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_word2VecModelClassName);
         }
 
         /// <summary>
@@ -28,14 +25,12 @@ public Word2VecModel()
         /// <see cref="Word2VecModel"/> a unique ID
         /// </summary>
         /// <param name="uid">An immutable unique ID for the object and its derivatives.</param>
-        public Word2VecModel(string uid)
+        public Word2VecModel(string uid) : base(s_word2VecModelClassName, uid)
         {
-            _jvmObject = SparkEnvironment.JvmBridge.CallConstructor(s_word2VecModelClassName, uid);
         }
         
-        internal Word2VecModel(JvmObjectReference jvmObject)
+        internal Word2VecModel(JvmObjectReference jvmObject) : base(jvmObject)
         {
-            _jvmObject = jvmObject;
         }
         
         JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
@@ -60,8 +55,7 @@ public DataFrame FindSynonyms(string word, int num) =>
             new DataFrame((JvmObjectReference)_jvmObject.Invoke("findSynonyms", word, num));
         
         /// <summary>
-        /// Loads the <see cref="Word2VecModel"/> that was previously saved using
-        /// <see cref="Save(string)"/>.
+        /// Loads the <see cref="Word2VecModel"/> that was previously saved using Save(string).
         /// </summary>
         /// <param name="path">
         /// The path the previous <see cref="Word2VecModel"/> was saved to
@@ -71,23 +65,6 @@ public static Word2VecModel Load(string path) => WrapAsWord2VecModel(
             SparkEnvironment.JvmBridge.CallStaticJavaMethod(
                 s_word2VecModelClassName, "load", path));
         
-        /// <summary>
-        /// Saves the <see cref="Word2VecModel"/> so that it can be loaded later using
-        /// <see cref="Load(string)"/>.
-        /// </summary>
-        /// <param name="path">The path to save the <see cref="Word2VecModel"/> to.</param>
-        /// <returns>New <see cref="Word2VecModel"/> object.</returns>
-        public Word2VecModel Save(string path) => 
-            WrapAsWord2VecModel(_jvmObject.Invoke("save", path));
-        
-        /// <summary>
-        /// The UID that was used to create the <see cref="Word2Vec"/>. If no UID is passed in
-        /// when creating the <see cref="Word2Vec"/> then a random UID is created when the
-        /// <see cref="Word2Vec"/> is created.
-        /// </summary>
-        /// <returns>string UID identifying the <see cref="Word2Vec"/>.</returns>
-        public string Uid() => (string)_jvmObject.Invoke("uid");
-
         private static Word2VecModel WrapAsWord2VecModel(object obj) => 
             new Word2VecModel((JvmObjectReference)obj);
     }
diff --git a/src/csharp/Microsoft.Spark/ML/Util/Identifiable.cs b/src/csharp/Microsoft.Spark/ML/Util/Identifiable.cs
new file mode 100644
index 000000000..565b8d63b
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/ML/Util/Identifiable.cs
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+namespace Microsoft.Spark.ML.Feature
+{
+    public interface Identifiable
+    {
+        /// <summary>
+        /// The UID of the object.
+        /// </summary>
+        /// <returns>string UID identifying the object</returns>
+        string Uid();
+    }
+}

From f803aa8c890ec6f530d63a366056e8de0a9e18ac Mon Sep 17 00:00:00 2001
From: Andrew Fogarty <andrew.f.fogarty@gmail.com>
Date: Fri, 10 Jul 2020 14:21:42 -0700
Subject: [PATCH 22/27] Run Delta Lake tests against 0.6.1 (#588)

---
 .../Microsoft.Spark.Extensions.Delta.E2ETest/DeltaFixture.cs    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaFixture.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaFixture.cs
index 9c0472485..9ca3851f0 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaFixture.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaFixture.cs
@@ -16,7 +16,7 @@ public DeltaFixture()
         {
             Environment.SetEnvironmentVariable(
                 SparkFixture.EnvironmentVariableNames.ExtraSparkSubmitArgs,
-                "--packages io.delta:delta-core_2.11:0.6.0 " +
+                "--packages io.delta:delta-core_2.11:0.6.1 " +
                 "--conf spark.databricks.delta.snapshotPartitions=2 " +
                 "--conf spark.sql.sources.parallelPartitionDiscovery.parallelism=5");
             SparkFixture = new SparkFixture();

From 01433ca28e28c1a9f113e0bc97104f4c52bdffa1 Mon Sep 17 00:00:00 2001
From: Usman Mohammed <38691403+usmanmohammed@users.noreply.github.com>
Date: Fri, 24 Jul 2020 20:46:22 +0100
Subject: [PATCH 23/27] Add more DataFrame operations examples (#599)

---
 .../Sql/Batch/Basic.cs                        | 19 +++++++++++++++++++
 .../Sql/Basic.fs                              | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
index fe57f7d1b..e09c79e20 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
@@ -108,6 +108,25 @@ public void Run(string[] args)
 
             DataFrame joinedDf3 = df.Join(df, df["name"] == df["name"], "outer");
             joinedDf3.Show();
+            
+            // Union of two data frames
+            DataFrame unionDf = df.Union(df);
+            unionDf.Show();
+
+            // Add new column to data frame
+            df.WithColumn("location", Lit("Seattle")).Show();
+
+            // Rename existing column
+            df.WithColumnRenamed("name", "fullname").Show();
+
+            // Filter rows with null age
+            df.Filter(Col("age").IsNull()).Show();
+
+            // Fill null values in age column with -1
+            df.Na().Fill(-1, new[] { "age" }).Show();
+
+            // Drop age column
+            df.Drop(new[] { "age" }).Show();
 
             spark.Stop();
         }
diff --git a/examples/Microsoft.Spark.FSharp.Examples/Sql/Basic.fs b/examples/Microsoft.Spark.FSharp.Examples/Sql/Basic.fs
index 4e503fac9..6af1f81f7 100644
--- a/examples/Microsoft.Spark.FSharp.Examples/Sql/Basic.fs
+++ b/examples/Microsoft.Spark.FSharp.Examples/Sql/Basic.fs
@@ -78,6 +78,25 @@ type Basic() =
 
             let joinedDf3 = df.Join(df, df.["name"].EqualTo(df.["name"]), "outer")
             joinedDf3.Show()
+            
+            // Union of two data frames
+            let unionDf = df.Union(df)
+            unionDf.Show()
+
+            // Add new column to data frame
+            df.WithColumn("location", Functions.Lit("Seattle")).Show()
+
+            // Rename existing column
+            df.WithColumnRenamed("name", "fullname").Show()
+
+            // Filter rows with null age
+            df.Filter(df.["age"].IsNull()).Show()
+
+            // Fill null values in age column with -1
+            df.Na().Fill(-1L, ["age"]).Show()
+
+            // Drop age column
+            df.Drop(df.["age"]).Show()
 
             spark.Stop()
             0

From a75d6531a4bf4757aaef7d5e669f114fb4d75bc1 Mon Sep 17 00:00:00 2001
From: Ed Elliott <GoEddie@users.noreply.github.com>
Date: Fri, 24 Jul 2020 20:47:18 +0100
Subject: [PATCH 24/27] Add Param and methods to Spark.ML (#586)

---
 .ionide/symbolCache.db                        | Bin 0 -> 28672 bytes
 .../IpcTests/ML/Feature/BucketizerTests.cs    |  14 +++
 .../IpcTests/ML/Param/ParamTests.cs           |  35 ++++++++
 .../Microsoft.Spark/ML/Feature/FeatureBase.cs |  44 +++++++++-
 src/csharp/Microsoft.Spark/ML/Param/Param.cs  |  83 ++++++++++++++++++
 5 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 .ionide/symbolCache.db
 create mode 100644 src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Param/ParamTests.cs
 create mode 100644 src/csharp/Microsoft.Spark/ML/Param/Param.cs

diff --git a/.ionide/symbolCache.db b/.ionide/symbolCache.db
new file mode 100644
index 0000000000000000000000000000000000000000..43e567d6d682d85dd32b3baebb0fdf61f67c1643
GIT binary patch
literal 28672
zcmeHPYiuJ|6}A(<pK-j2lTG3{v7KzX&Lqyu+kIp=n>f3n*{Ao>?UI&RXX3biC7$u_
zru*Uw_|aboAq4!aPz(G7Dp3^)2^9$msX|362qA<70{*l}EvWneB<{TIImtK)+7qdu
zu{?L~_%Yvi&pr3fz2}^JGgp@L0vB1UR7<Q7nKZST%x2S>NW^3^wa~*(5A|iH8H;*B
z&*Jr7uND(C74gzvnY~{#(YNt3Bw$FukbofpLjr~b3<($#FeG3|z>t6;0j&hSxNf$G
zdV0*S4h!s^BA3}J-Ki9L<<v5NH9dDN9l17lb~znUK0M-eMYg>b^F{6=TjC<u($8N@
z?EF3>uvK9>U*;l97n^=RUn$l~<tD#KmsfZBKmB&se=qTi)k<lBTg#X8LcUUtoV|2u
zIX!n#hpPP3IYFrA@6_mQb+2E}S4tZ-ffetXzQ0lBcz!?kU&!+ttdL#L6f43~X`{%M
zxUzhcJpnJXg1}YxF2c2~josMGmAP0e7I&>UNgYJCMRsfNA1?9Bl`LCS%K#N&#7gzf
z;{QO@KD+;){!jcL`9JXd#`A*vLwDKru4~cx6X&wy2aZ|$+xDBbpW9ZfKeZmWylYu)
z{ax!q%P(3^n}21#VfwY{Wv%+^=L?ZG(qpDCr$c=8^*P6`^IVl5<5tIVd0~tSzgigM
z?z5uk`LPT6Y_-By)&wRae!(ne*4gR?lUBdKT&?7)Y>8Rp+k4w%>fhA!qkaWU!g8j&
z9avyP?TH38h17hd$}v3E&T>vpABO?_g&-RIX#2Phe6h%7MQ!I9p4+7FTpy5icQ=}>
z549iWIkuWzms8^H1xPCcSS58?UH(Q%WgSo}pSi&1%ghFqw{V?jb6g|G_h<$0h{v%C
z-gd4n!}2^=x>J<eoHkE74+GR%50((xtZkl#5VYEszAseSYn6><?k-onw82$bp;F~#
znXHv1fvlB@r5g~-pfv-3(m$(DEF(1oCR-_IX=f^>xh}w>8<Ry!#*@=R2ms-AG5)xJ
zRP9>SGqHC9v|D(^V*GlIoc1w|iaru~I2##}Q_|T1nBeK3-|27DujYN}U*Qnog;yWG
zZr^}f59*n?f&gs=trhybzAhapG&<c3x5?!BbZJA_Qci3;;1Wc808lP%6pp@$IRG#+
zco+_!Tc^B0|H>7CzWFvl0k6CDOn7FsU92`wI{g3@Pu)FM&(n0byiegJk8tp$;dZ(^
zv=Y$fNsAXqBh!KP@Nsv!7c*PDz?GP*+?q1NVCFOrK}H;XiU+ZH0EsJTZI6;T-JAq-
zjuS+D<wU}6K%+2?D2wUcX{7IS*_uE{fU1BfryZX?ashsGeTe+%RIpiHsgw#%1s;0{
z?h3fN)qYq#KXPcxiZ}#!^|}W2yC-?@EvJv|03L4$?uxpGbl@9L13Lbu@4GfFzGDn+
zVC?WZLOZIu(Li%nfQvWSa@ZUm8COnomIDH^+w1`7K`Q_{h1O`SUDimWCgBt#t;{nk
zEdcyz2&=_f+pVE(;1Yw9836Y8qX9bTyxjJLCe{~eVwt5$tK^%H(CNGXEB=V@TfXDo
z_q?}^`Ts7)24ntj%>U_#0}R28`M+eFlgwc9Ox&3NKZQZlZ_NMqF!>tue`EenJqeO=
z-(k%EWxt9n4P*W<`j}){81sMGVPVYwoyPqCY2B>G{GWCz|I_n-YoKm$sAt63{IKZ@
zCjVW()Axq2+xxcnw&!C{$^DLd+4ZLDu=DedKRby1-}XDUcWqa#zqMYm{LFH`_1mrC
zmLIi@o8L1(L;w7AFA|##XrEvtfaXaX7#xVzi>ihYNYj*Mww$X`*YV|QzC^=M?s7b{
zR2E&Ad_Jr7vJDN1UN$i$u~P>{b1*gdEEdg`lZyRFw(<J?SRU|jBs?Gp#Bl=Hr*5Ow
zRk$sDSVssAyDIg0F>CoE4YZL{_BddI=VPNxf)tFtiS1c{kwLRP_Bs2KJFK{M=Zfgp
z5MzaS^-RLY01>>A4J9(PJCPk;3|-Gg3h=}8Y*2oI=KR!=4YAJvd^~4-*c;Zwp=d)e
zwB3Zp8E>CHASQ8d{J&ySn^K6#J;CrWR!<OehB~X2isX6YH`pu(F198?`BP{H>`-^;
zqM`|6+mM`(61?aElrp4!0${VlSjO{EDzvau3op=cAg;PpUaK$*T(-!H0bn9Ea6!6~
zfK+Z2jZ}ANN{^JVURgcM@|@U<%-5<_8pe2m6F=O3P0ZtfS{ltsMe8cM8#S4aNHRO7
zP>{8>qXSDzJ6)YVwwmL`gM=7R(3d8$>Y$^w!>jYuqOlTbI-J<LT%j>)^}tn~k4R`#
z%gp&{VwO;uNmcVHV%BjK=nOZ4Rh#YB_F$tnC4XE!;#3Ygq;<NOFRv=@L3mYrMirQ3
zewaoZ8U=@gV@hjM%<JjR8e8P$lnjpm+|4;GaRiSAvjA5NtfsvjUn$ek@?Dx^%e03n
z3biV?lq=IH*m>Hbn}0)EjsT`7(7Hm(^n4Shvcww9bHjiGUTkb|JUFBEjjap;AiR+{
zRfi)Sw-Q%wk3G;24hEwfM_e&LA1|CP7zp+@8d-fr1qb>{%Ti8k6mY>C>QgR<<e`3H
z5J2F?Dh91VuNo7ho{4<`py3TygT7#sh)w<BsyP3Tn7(B4-|~Imchviu_pIkF&y@Rp
z_YK!CUA*%X=hvORj@KPS_9wP~*uG)wv%X;+v%F;)Z+)-zM$2DX*35r0S4{t+H+Y&?
z?L{CBypd`w5*r1BQ*g1l0$@kc`s-LY*1uiDWjz-@3m{Wy!Fqa-jA-c)k0-YQYBGuS
zfDlo4T2gj?^5lgBBv7~8EY`~MDu|t*lx_h6cu_S#?)5s~dLF>x&4FUUSXg<^HJJmg
z+yu~gPaO(<h}wUwH^BXGq6I|7W@4$j0D-Me+jE@~&2coTT?aJq##+%M(UV{#!7Ov-
zoUksZwQB$tucxB0)1Y$#7qtw4MJ#PF8gE|Ot*ZbQPmQA;(Y@683V_B_<7hA{sqtlS
ze>^pg4n>Feuf`;H>7b0t(b?!3eIh&iXMNlE!-ryH6Vpc*fwWGc*$D;%gTqk&q-Cl6
z&AhPQ$ki?Yc)V6Ocw}&wR=2ebVDZ#{FgVz|M=$;yfW}jQD)4qec<>o;`)ASWbD(2j
zKwGjUe(ny9$7ZuaBe${y!12mP!8_>_%6uN&A8%dEKw#jIev4{6q2zTjkIn%KC!$yb
zu1H5@5XzU1b*Mvx8ey0CdUj<Iz~f1n!b=iPgZtx2I1-5LQ^Lf)063)4ObPvg{s|~z
z>SW00%Hm3Vam6}SWN^JQ4~XCiyMMBOimJ2yEVw`3RFM``6M<fzhl6Ea0<~t-HP}*0
zEOS~W&P!;{u>P+8rh)j(l45I9&m?x{pdOk1GyTW(8~A=7nOb-8>4TyfiALgBG{yP<
zfN9y}f7L(ad(C&&`$zA~o<DdB?!UY5xm~W;T^-KvIKz(bIlAp%wm)Ng$9CNMrZr~y
zp(WY+<JKv<1HiiZ_vWJMLwXAn2|v*)hy%|_IT@H_ls4H|=>oi^`*Tu02K?}xl$2k0
zbAGCma`TY_FI()QQ7l(SGLY<5OiOdr+o8!4+e1*~+h_)d<Zv>nql67B-dwfHZZUiz
zzabvn1HdDMHf^Ll5)zb@WM2U=c;2LBe==J4CfT<D3SO6-j3xEfmVqu=yYhSUzCr|B
zqsH6DdlWSiy{nomX}zZ@ywh<VO{2!$<4yhB;^r17Uz%kyl@cc&g}WLOn45_(T}(tN
zt=f)w;;jKRyiG0`FQa(`>-n1+TU#`8q*c)H!K{K&_voHgaIsS6#Dmy)4GV_@uWLz6
zCQj|6Ygy;mRq^1efCpaJLU|n1bS;|z2Cr*L98HWhx|R)qg4eYq#uF1dLT%c$kd6vq
zg6CvS^dv?aO!Sl<x>ey-p$ur@`DqgVL}*9Lx0e7KVs&&S^rGFGR!5--h~O1_<j{yZ
zcF=Tq1%QUv;f**)_HeK_y17O!`!b+|*Yt+n!+y2t74iy*{4B+jaIdpBuwAhptb-Dr
zKns$J55{Mdk*E?Xkg16l7#f*W&(v}N9M75@PsN{u;J|ji24L~5cocR4n9IQhv2p+x
z&(s@Vj$deWWcm~6Bax$`5tSzQRsoG^wAph!7EdTrmwZC%{&DJgB@2M@3=~w?qJpq;
zN4Y(A974P=9;Wt>78cOIj<(HBye+O7eATP3F@vFj0ZfVApl|kbfCFCq0p*~C8rNO|
z5O|G}`oPsFL8V8~HcD~+AGF&{{x#oUeXn>w_CEI7Jzw?M-QRS_UEg(`cE0b-IsWD-
z*gvut>CcRpApt`Ih6D@=7!vqDl0cTI`}N6#m<}|fmUX}y7k4Wa4`L?jmVGkJ6*QPm
zYp1g@6xEwkmnZ@8xJSUAea_Qp<Ehl~l%Cm!PHFRHx-ptSECg=chqoPR%9}d8)BfIA
zWFST>R|;ZeGEFy$x=)|HN*vD)pBhe0C^NlncK8B;yY%7BE>k_JC|$KEu;nbLo^u3l
z$A=f=Oa&b*Noz^uG)L4O_hgO6Lz)T9gtSOo1n!-O7mYD<ct(w`m*!7&m5i)8^~@$w
zH_pS>7>XTNG)BbMWD7*yKTnn{p2kcErn^;5QO|^~@dfjb=g`(vO!rQ!^N%?io_SZ?
zsF<0ra;!kxqpY{^imti@RPfs8^z`%@jfgc_8rKLgN6`#2wD@X-UA_4A%qCGc(UU6~
zuc#A&3DvB(O|fV<3^&xn+pKdkFx9uS8&=()>Y`TKtVBwi1a7T|SN5q>Q|Fbs-R8Wi
r5i_V}JR8*1*wjg2b^;fqlb8T7-kvp6;i)FU08O@N<|Vu8nsWLNN`{i9

literal 0
HcmV?d00001

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
index a075334de..e9193fd0b 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
@@ -5,6 +5,7 @@
 using System.Collections.Generic;
 using System.IO;
 using Microsoft.Spark.ML.Feature;
+using Microsoft.Spark.ML.Feature.Param;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.UnitTest.TestUtils;
 using Xunit;
@@ -58,6 +59,19 @@ public void TestBucketizer()
                 Bucketizer loadedBucketizer = Bucketizer.Load(savePath);
                 Assert.Equal(bucketizer.Uid(), loadedBucketizer.Uid());
             }
+            
+            Assert.NotEmpty(bucketizer.ExplainParams());
+            
+            Param handleInvalidParam = bucketizer.GetParam("handleInvalid");
+            Assert.NotEmpty(handleInvalidParam.Doc);
+            Assert.NotEmpty(handleInvalidParam.Name);
+            Assert.Equal(handleInvalidParam.Parent, bucketizer.Uid());
+
+            Assert.NotEmpty(bucketizer.ExplainParam(handleInvalidParam));
+            bucketizer.Set(handleInvalidParam, "keep");
+            Assert.Equal("keep", bucketizer.GetHandleInvalid());
+             
+            Assert.Equal("error", bucketizer.Clear(handleInvalidParam).GetHandleInvalid());
         }
 
         [Fact]
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Param/ParamTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Param/ParamTests.cs
new file mode 100644
index 000000000..ecb9166e1
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Param/ParamTests.cs
@@ -0,0 +1,35 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.ML.Feature.Param;
+using Microsoft.Spark.Sql;
+using Xunit;
+
+namespace Microsoft.Spark.E2ETest.IpcTests.ML.ParamTests
+{
+    [Collection("Spark E2E Tests")]
+    public class ParamTests
+    {
+        private readonly SparkSession _spark;
+
+        public ParamTests(SparkFixture fixture)
+        {
+            _spark = fixture.Spark;
+        }
+
+        [Fact]
+        public void Test()
+        {
+            const string expectedParent = "parent";
+            const string expectedName = "name";
+            const string expectedDoc = "doc";
+            
+            var param = new Param(expectedParent, expectedName, expectedDoc);
+            
+            Assert.Equal(expectedParent, param.Parent);
+            Assert.Equal(expectedDoc, param.Doc);
+            Assert.Equal(expectedName, param.Name);
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs b/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
index d47339178..fcc90b43d 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/FeatureBase.cs
@@ -56,7 +56,49 @@ internal FeatureBase(JvmObjectReference jvmObject)
         public T Save(string path) => 
             WrapAsType((JvmObjectReference)_jvmObject.Invoke("save", path));
 
-        private T WrapAsType(JvmObjectReference reference)
+        /// <summary>
+        /// Clears any value that was previously set for this <see cref="Param"/>. The value is
+        /// reset to the default value.
+        /// </summary>
+        /// <param name="param">The <see cref="Param"/> to set back to its original value</param>
+        /// <returns>Object reference that was used to clear the <see cref="Param"/></returns>
+        public T Clear(Param.Param param) => 
+            WrapAsType((JvmObjectReference)_jvmObject.Invoke("clear", param));
+
+        /// <summary>
+        /// Returns a description of how a specific <see cref="Param"/> works and is currently set.
+        /// </summary>
+        /// <param name="param">The <see cref="Param"/> to explain</param>
+        /// <returns>Description of the <see cref="Param"/></returns>
+        public string ExplainParam(Param.Param param) => 
+            (string)_jvmObject.Invoke("explainParam", param);
+
+        /// <summary>
+        /// Returns a description of how all of the <see cref="Param"/>'s that apply to this object
+        /// work and how they are currently set.
+        /// </summary>
+        /// <returns>Description of all the applicable <see cref="Param"/>'s</returns>
+        public string ExplainParams() => (string)_jvmObject.Invoke("explainParams");
+
+        /// <summary>
+        /// Retrieves a <see cref="Param"/> so that it can be used to set the value of the
+        /// <see cref="Param"/> on the object.
+        /// </summary>
+        /// <param name="paramName">The name of the <see cref="Param"/> to get.</param>
+        /// <returns><see cref="Param"/> that can be used to set the actual value</returns>
+        public Param.Param GetParam(string paramName) => 
+            new Param.Param((JvmObjectReference)_jvmObject.Invoke("getParam", paramName));
+
+        /// <summary>
+        /// Sets the value of a specific <see cref="Param"/>.
+        /// </summary>
+        /// <param name="param"><see cref="Param"/> to set the value of</param>
+        /// <param name="value">The value to use</param>
+        /// <returns>The object that contains the newly set <see cref="Param"/></returns>
+        public T Set(Param.Param param, object value) => 
+            WrapAsType((JvmObjectReference)_jvmObject.Invoke("set", param, value));
+
+        private static T WrapAsType(JvmObjectReference reference)
         {
             ConstructorInfo constructor = typeof(T)
                 .GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance)
diff --git a/src/csharp/Microsoft.Spark/ML/Param/Param.cs b/src/csharp/Microsoft.Spark/ML/Param/Param.cs
new file mode 100644
index 000000000..f524ea012
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/ML/Param/Param.cs
@@ -0,0 +1,83 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.ML.Feature.Param
+{
+    /// <summary>
+    /// A <see cref="Param"/> with self-contained documentation and optionally default value.
+    ///
+    /// A <see cref="Param"/> references an individual parameter that includes documentation, the
+    /// name of the parameter and optionally a default value. Params can either be set using the
+    /// generic <see cref="Param"/> methods or by using explicit methods. For example
+    /// <see cref="Bucketizer"/> has <c>SetHandleInvalid</c> or you can call 
+    /// <c>GetParam("handleInvalid")</c>and then <see cref="Bucketizer"/>. Set using the
+    /// <see cref="Param"/> and the value you want to use.
+    /// </summary>
+    public class Param : IJvmObjectReferenceProvider
+    {
+        private static readonly string s_ParamClassName = 
+            "org.apache.spark.ml.param.Param";
+        
+        private readonly JvmObjectReference _jvmObject;
+        
+        /// <summary>
+        /// Creates a new instance of a <see cref="Param"/> which will be attached to the parent
+        /// specified. The most likely use case for a <see cref="Param"/> is being read from a 
+        /// parent object such as <see cref="Bucketizer"/> rather than independently
+        /// <param name="parent">The parent object to assign the <see cref="Param"/> to</param>
+        /// <param name="name">The name of this <see cref="Param"/></param>
+        /// <param name="doc">The documentation for this <see cref="Param"/></param>
+        /// </summary>
+        public Param(Identifiable parent, string name, string doc)
+            : this(SparkEnvironment.JvmBridge.CallConstructor(
+                s_ParamClassName, parent.Uid(), name, doc))
+        {
+        }
+        
+        /// <summary>
+        /// Creates a new instance of a <see cref="Param"/> which will be attached to the parent 
+        /// with the UID specified. The most likely use case for a <see cref="Param"/> is being 
+        /// read from a parent object such as <see cref="Bucketizer"/> rather than independently
+        /// <param name="parent">
+        /// The UID of the  parent object to assign the <see cref="Param"/> to
+        /// </param>
+        /// <param name="name">The name of this <see cref="Param"/></param>
+        /// <param name="doc">The documentation for this <see cref="Param"/></param>
+        /// </summary>
+        public Param(string parent, string name, string doc)
+            : this(SparkEnvironment.JvmBridge.CallConstructor(s_ParamClassName, parent, name, doc))
+        {
+        }
+
+        internal Param(JvmObjectReference jvmObject)
+        {
+            _jvmObject = jvmObject;
+        }
+        
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+        
+        /// <summary>
+        /// The description of what the <see cref="Param"/> does and how it works including any
+        /// defaults and the current value
+        /// </summary>
+        /// <returns>A description of how the <see cref="Param"/> works</returns>
+        public string Doc => (string)_jvmObject.Invoke("doc");
+        
+        /// <summary>
+        /// The name of the <see cref="Param"/>
+        /// </summary>
+        /// <returns>The name of the <see cref="Param"/></returns>
+        public string Name => (string)_jvmObject.Invoke("name");
+
+        /// <summary>
+        /// The object that contains the <see cref="Param"/>
+        /// </summary>
+        /// <returns>The UID of the parent oject that this <see cref="Param"/> belongs to</returns>
+        public string Parent => (string)_jvmObject.Invoke("parent");
+    }
+}

From 7a6dd0c5700988e3f634b27fe38380d72e2f93fd Mon Sep 17 00:00:00 2001
From: Usman Mohammed <38691403+usmanmohammed@users.noreply.github.com>
Date: Sat, 25 Jul 2020 04:25:07 +0100
Subject: [PATCH 25/27] Update ubuntu instructions for running the example app
 (#603)

---
 docs/building/ubuntu-instructions.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md
index 0e3dbdf40..b259768e5 100644
--- a/docs/building/ubuntu-instructions.md
+++ b/docs/building/ubuntu-instructions.md
@@ -185,7 +185,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
          ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
-         Microsoft.Spark.CSharp.Examples Sql.Batch.Basic $SPARK_HOME/examples/src/main/resources/people.json
+         ./Microsoft.Spark.CSharp.Examples Sql.Batch.Basic $SPARK_HOME/examples/src/main/resources/people.json
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredNetworkWordCount](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredNetworkWordCount.cs)**
          ```bash
@@ -193,7 +193,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
          ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
-         Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredNetworkWordCount localhost 9999
+         ./Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredNetworkWordCount localhost 9999
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (maven accessible)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
          ```bash
@@ -202,7 +202,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
          ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
-         Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
+         ./Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (jars provided)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
          ```bash
@@ -211,7 +211,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
          ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
-         Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
+         ./Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
           ```
 
 Feel this experience is complicated? Help us by taking up [Simplify User Experience for Running an App](https://github.com/dotnet/spark/issues/6)

From 606235beda67657bf14529326e29f983f7683de7 Mon Sep 17 00:00:00 2001
From: Ed Elliott <GoEddie@users.noreply.github.com>
Date: Sat, 1 Aug 2020 08:39:01 +0100
Subject: [PATCH 26/27] Update .gitignore to include .ionide folder (#609)

---
 .gitignore             |   3 +++
 .ionide/symbolCache.db | Bin 28672 -> 0 bytes
 2 files changed, 3 insertions(+)
 delete mode 100644 .ionide/symbolCache.db

diff --git a/.gitignore b/.gitignore
index 251cfa7e2..faada9c8a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -367,3 +367,6 @@ hs_err_pid*
 
 # The target folder contains the output of building
 **/target/**
+
+# F# vs code 
+.ionide/
diff --git a/.ionide/symbolCache.db b/.ionide/symbolCache.db
deleted file mode 100644
index 43e567d6d682d85dd32b3baebb0fdf61f67c1643..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 28672
zcmeHPYiuJ|6}A(<pK-j2lTG3{v7KzX&Lqyu+kIp=n>f3n*{Ao>?UI&RXX3biC7$u_
zru*Uw_|aboAq4!aPz(G7Dp3^)2^9$msX|362qA<70{*l}EvWneB<{TIImtK)+7qdu
zu{?L~_%Yvi&pr3fz2}^JGgp@L0vB1UR7<Q7nKZST%x2S>NW^3^wa~*(5A|iH8H;*B
z&*Jr7uND(C74gzvnY~{#(YNt3Bw$FukbofpLjr~b3<($#FeG3|z>t6;0j&hSxNf$G
zdV0*S4h!s^BA3}J-Ki9L<<v5NH9dDN9l17lb~znUK0M-eMYg>b^F{6=TjC<u($8N@
z?EF3>uvK9>U*;l97n^=RUn$l~<tD#KmsfZBKmB&se=qTi)k<lBTg#X8LcUUtoV|2u
zIX!n#hpPP3IYFrA@6_mQb+2E}S4tZ-ffetXzQ0lBcz!?kU&!+ttdL#L6f43~X`{%M
zxUzhcJpnJXg1}YxF2c2~josMGmAP0e7I&>UNgYJCMRsfNA1?9Bl`LCS%K#N&#7gzf
z;{QO@KD+;){!jcL`9JXd#`A*vLwDKru4~cx6X&wy2aZ|$+xDBbpW9ZfKeZmWylYu)
z{ax!q%P(3^n}21#VfwY{Wv%+^=L?ZG(qpDCr$c=8^*P6`^IVl5<5tIVd0~tSzgigM
z?z5uk`LPT6Y_-By)&wRae!(ne*4gR?lUBdKT&?7)Y>8Rp+k4w%>fhA!qkaWU!g8j&
z9avyP?TH38h17hd$}v3E&T>vpABO?_g&-RIX#2Phe6h%7MQ!I9p4+7FTpy5icQ=}>
z549iWIkuWzms8^H1xPCcSS58?UH(Q%WgSo}pSi&1%ghFqw{V?jb6g|G_h<$0h{v%C
z-gd4n!}2^=x>J<eoHkE74+GR%50((xtZkl#5VYEszAseSYn6><?k-onw82$bp;F~#
znXHv1fvlB@r5g~-pfv-3(m$(DEF(1oCR-_IX=f^>xh}w>8<Ry!#*@=R2ms-AG5)xJ
zRP9>SGqHC9v|D(^V*GlIoc1w|iaru~I2##}Q_|T1nBeK3-|27DujYN}U*Qnog;yWG
zZr^}f59*n?f&gs=trhybzAhapG&<c3x5?!BbZJA_Qci3;;1Wc808lP%6pp@$IRG#+
zco+_!Tc^B0|H>7CzWFvl0k6CDOn7FsU92`wI{g3@Pu)FM&(n0byiegJk8tp$;dZ(^
zv=Y$fNsAXqBh!KP@Nsv!7c*PDz?GP*+?q1NVCFOrK}H;XiU+ZH0EsJTZI6;T-JAq-
zjuS+D<wU}6K%+2?D2wUcX{7IS*_uE{fU1BfryZX?ashsGeTe+%RIpiHsgw#%1s;0{
z?h3fN)qYq#KXPcxiZ}#!^|}W2yC-?@EvJv|03L4$?uxpGbl@9L13Lbu@4GfFzGDn+
zVC?WZLOZIu(Li%nfQvWSa@ZUm8COnomIDH^+w1`7K`Q_{h1O`SUDimWCgBt#t;{nk
zEdcyz2&=_f+pVE(;1Yw9836Y8qX9bTyxjJLCe{~eVwt5$tK^%H(CNGXEB=V@TfXDo
z_q?}^`Ts7)24ntj%>U_#0}R28`M+eFlgwc9Ox&3NKZQZlZ_NMqF!>tue`EenJqeO=
z-(k%EWxt9n4P*W<`j}){81sMGVPVYwoyPqCY2B>G{GWCz|I_n-YoKm$sAt63{IKZ@
zCjVW()Axq2+xxcnw&!C{$^DLd+4ZLDu=DedKRby1-}XDUcWqa#zqMYm{LFH`_1mrC
zmLIi@o8L1(L;w7AFA|##XrEvtfaXaX7#xVzi>ihYNYj*Mww$X`*YV|QzC^=M?s7b{
zR2E&Ad_Jr7vJDN1UN$i$u~P>{b1*gdEEdg`lZyRFw(<J?SRU|jBs?Gp#Bl=Hr*5Ow
zRk$sDSVssAyDIg0F>CoE4YZL{_BddI=VPNxf)tFtiS1c{kwLRP_Bs2KJFK{M=Zfgp
z5MzaS^-RLY01>>A4J9(PJCPk;3|-Gg3h=}8Y*2oI=KR!=4YAJvd^~4-*c;Zwp=d)e
zwB3Zp8E>CHASQ8d{J&ySn^K6#J;CrWR!<OehB~X2isX6YH`pu(F198?`BP{H>`-^;
zqM`|6+mM`(61?aElrp4!0${VlSjO{EDzvau3op=cAg;PpUaK$*T(-!H0bn9Ea6!6~
zfK+Z2jZ}ANN{^JVURgcM@|@U<%-5<_8pe2m6F=O3P0ZtfS{ltsMe8cM8#S4aNHRO7
zP>{8>qXSDzJ6)YVwwmL`gM=7R(3d8$>Y$^w!>jYuqOlTbI-J<LT%j>)^}tn~k4R`#
z%gp&{VwO;uNmcVHV%BjK=nOZ4Rh#YB_F$tnC4XE!;#3Ygq;<NOFRv=@L3mYrMirQ3
zewaoZ8U=@gV@hjM%<JjR8e8P$lnjpm+|4;GaRiSAvjA5NtfsvjUn$ek@?Dx^%e03n
z3biV?lq=IH*m>Hbn}0)EjsT`7(7Hm(^n4Shvcww9bHjiGUTkb|JUFBEjjap;AiR+{
zRfi)Sw-Q%wk3G;24hEwfM_e&LA1|CP7zp+@8d-fr1qb>{%Ti8k6mY>C>QgR<<e`3H
z5J2F?Dh91VuNo7ho{4<`py3TygT7#sh)w<BsyP3Tn7(B4-|~Imchviu_pIkF&y@Rp
z_YK!CUA*%X=hvORj@KPS_9wP~*uG)wv%X;+v%F;)Z+)-zM$2DX*35r0S4{t+H+Y&?
z?L{CBypd`w5*r1BQ*g1l0$@kc`s-LY*1uiDWjz-@3m{Wy!Fqa-jA-c)k0-YQYBGuS
zfDlo4T2gj?^5lgBBv7~8EY`~MDu|t*lx_h6cu_S#?)5s~dLF>x&4FUUSXg<^HJJmg
z+yu~gPaO(<h}wUwH^BXGq6I|7W@4$j0D-Me+jE@~&2coTT?aJq##+%M(UV{#!7Ov-
zoUksZwQB$tucxB0)1Y$#7qtw4MJ#PF8gE|Ot*ZbQPmQA;(Y@683V_B_<7hA{sqtlS
ze>^pg4n>Feuf`;H>7b0t(b?!3eIh&iXMNlE!-ryH6Vpc*fwWGc*$D;%gTqk&q-Cl6
z&AhPQ$ki?Yc)V6Ocw}&wR=2ebVDZ#{FgVz|M=$;yfW}jQD)4qec<>o;`)ASWbD(2j
zKwGjUe(ny9$7ZuaBe${y!12mP!8_>_%6uN&A8%dEKw#jIev4{6q2zTjkIn%KC!$yb
zu1H5@5XzU1b*Mvx8ey0CdUj<Iz~f1n!b=iPgZtx2I1-5LQ^Lf)063)4ObPvg{s|~z
z>SW00%Hm3Vam6}SWN^JQ4~XCiyMMBOimJ2yEVw`3RFM``6M<fzhl6Ea0<~t-HP}*0
zEOS~W&P!;{u>P+8rh)j(l45I9&m?x{pdOk1GyTW(8~A=7nOb-8>4TyfiALgBG{yP<
zfN9y}f7L(ad(C&&`$zA~o<DdB?!UY5xm~W;T^-KvIKz(bIlAp%wm)Ng$9CNMrZr~y
zp(WY+<JKv<1HiiZ_vWJMLwXAn2|v*)hy%|_IT@H_ls4H|=>oi^`*Tu02K?}xl$2k0
zbAGCma`TY_FI()QQ7l(SGLY<5OiOdr+o8!4+e1*~+h_)d<Zv>nql67B-dwfHZZUiz
zzabvn1HdDMHf^Ll5)zb@WM2U=c;2LBe==J4CfT<D3SO6-j3xEfmVqu=yYhSUzCr|B
zqsH6DdlWSiy{nomX}zZ@ywh<VO{2!$<4yhB;^r17Uz%kyl@cc&g}WLOn45_(T}(tN
zt=f)w;;jKRyiG0`FQa(`>-n1+TU#`8q*c)H!K{K&_voHgaIsS6#Dmy)4GV_@uWLz6
zCQj|6Ygy;mRq^1efCpaJLU|n1bS;|z2Cr*L98HWhx|R)qg4eYq#uF1dLT%c$kd6vq
zg6CvS^dv?aO!Sl<x>ey-p$ur@`DqgVL}*9Lx0e7KVs&&S^rGFGR!5--h~O1_<j{yZ
zcF=Tq1%QUv;f**)_HeK_y17O!`!b+|*Yt+n!+y2t74iy*{4B+jaIdpBuwAhptb-Dr
zKns$J55{Mdk*E?Xkg16l7#f*W&(v}N9M75@PsN{u;J|ji24L~5cocR4n9IQhv2p+x
z&(s@Vj$deWWcm~6Bax$`5tSzQRsoG^wAph!7EdTrmwZC%{&DJgB@2M@3=~w?qJpq;
zN4Y(A974P=9;Wt>78cOIj<(HBye+O7eATP3F@vFj0ZfVApl|kbfCFCq0p*~C8rNO|
z5O|G}`oPsFL8V8~HcD~+AGF&{{x#oUeXn>w_CEI7Jzw?M-QRS_UEg(`cE0b-IsWD-
z*gvut>CcRpApt`Ih6D@=7!vqDl0cTI`}N6#m<}|fmUX}y7k4Wa4`L?jmVGkJ6*QPm
zYp1g@6xEwkmnZ@8xJSUAea_Qp<Ehl~l%Cm!PHFRHx-ptSECg=chqoPR%9}d8)BfIA
zWFST>R|;ZeGEFy$x=)|HN*vD)pBhe0C^NlncK8B;yY%7BE>k_JC|$KEu;nbLo^u3l
z$A=f=Oa&b*Noz^uG)L4O_hgO6Lz)T9gtSOo1n!-O7mYD<ct(w`m*!7&m5i)8^~@$w
zH_pS>7>XTNG)BbMWD7*yKTnn{p2kcErn^;5QO|^~@dfjb=g`(vO!rQ!^N%?io_SZ?
zsF<0ra;!kxqpY{^imti@RPfs8^z`%@jfgc_8rKLgN6`#2wD@X-UA_4A%qCGc(UU6~
zuc#A&3DvB(O|fV<3^&xn+pKdkFx9uS8&=()>Y`TKtVBwi1a7T|SN5q>Q|Fbs-R8Wi
r5i_V}JR8*1*wjg2b^;fqlb8T7-kvp6;i)FU08O@N<|Vu8nsWLNN`{i9


From 61cfeb574627bd5d8dc4e9615a9af74937f0fdda Mon Sep 17 00:00:00 2001
From: bolcman <bolcman@gmail.com>
Date: Tue, 4 Aug 2020 23:42:24 -0400
Subject: [PATCH 27/27] udf-broadcast-variable-concurrency-fix (#612)

---
 .../Processor/BroadcastVariableProcessor.cs                    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs
index 41c817d02..bf8f48ed8 100644
--- a/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs
+++ b/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs
@@ -54,7 +54,8 @@ internal BroadcastVariables Process(Stream stream)
                     else
                     {
                         string path = SerDe.ReadString(stream);
-                        using FileStream fStream = File.Open(path, FileMode.Open, FileAccess.Read);
+                        using FileStream fStream = 
+                            File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Read);
                         object value = formatter.Deserialize(fStream);
                         BroadcastRegistry.Add(bid, value);
                     }

Oldest compatible Microsoft.Spark.Worker version	Incompatible features
v0.9.0	DataFrame with Grouped Map UDF (#277)
	DataFrame with Vector UDF (#277)
	Support for Broadcast Variables (#414)
	Support for TimestampType (#428)
Spark Version	microsoft-spark JAR
2.3.*	microsoft-spark-2.3.x-0.12.0.jar
2.4.0	microsoft-spark-2.4.x-0.12.0.jar
2.4.1
2.4.3
2.4.4
2.4.5
2.4.6
2.4.2	Not supported
Spark Version	microsoft-spark JAR
2.3.*	microsoft-spark-2.3.x-0.12.1.jar
2.4.0	microsoft-spark-2.4.x-0.12.1.jar
2.4.1
2.4.3
2.4.4
2.4.5
2.4.6
2.4.2	Not supported