diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index b0b3a1020e618f..987034cae61e02 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -24,6 +24,5 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
         liblttng-ust-dev \
         libssl-dev \
         libkrb5-dev \
-        zlib1g-dev \
         ninja-build \
         tzdata
diff --git a/.devcontainer/android/Dockerfile b/.devcontainer/android/Dockerfile
index 092e291fc6290d..bdbc7d68f258cb 100644
--- a/.devcontainer/android/Dockerfile
+++ b/.devcontainer/android/Dockerfile
@@ -21,9 +21,7 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
         liblttng-ust-dev \
         libssl-dev \
         libkrb5-dev \
-        zlib1g-dev \
         ninja-build \
-        zlib1g-dev \
         ninja-build \
         openjdk-17-jdk \
         pulseaudio
diff --git a/.devcontainer/wasm-multiThreaded/Dockerfile b/.devcontainer/wasm-multiThreaded/Dockerfile
index 70fc2380fdd098..ed0ee4f35f5d73 100644
--- a/.devcontainer/wasm-multiThreaded/Dockerfile
+++ b/.devcontainer/wasm-multiThreaded/Dockerfile
@@ -24,7 +24,6 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
         liblttng-ust-dev \
         libssl-dev \
         libkrb5-dev \
-        zlib1g-dev \
         ninja-build
 
 SHELL ["/bin/bash", "-c"]
diff --git a/.devcontainer/wasm/Dockerfile b/.devcontainer/wasm/Dockerfile
index 63335387f3cd1e..3950e19fcf1de4 100644
--- a/.devcontainer/wasm/Dockerfile
+++ b/.devcontainer/wasm/Dockerfile
@@ -23,7 +23,6 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
         liblttng-ust-dev \
         libssl-dev \
         libkrb5-dev \
-        zlib1g-dev \
         ninja-build
 
 SHELL ["/bin/bash", "-c"]
diff --git a/.github/workflows/check-no-merge-label.yml b/.github/workflows/check-no-merge-label.yml
new file mode 100644
index 00000000000000..1c01c2f7324175
--- /dev/null
+++ b/.github/workflows/check-no-merge-label.yml
@@ -0,0 +1,25 @@
+name: check-no-merge-label
+
+permissions:
+  pull-requests: read
+
+on:
+  pull_request:
+    types: [opened, edited, reopened, labeled, unlabeled, synchronize]
+    branches:
+      - 'main'
+      - 'release/**'
+
+jobs:
+  check-labels:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Check 'NO-MERGE' label
+      run: |
+        echo "Merging permission is disabled when the 'NO-MERGE' label is applied."
+        if [ "${{ contains(github.event.pull_request.labels.*.name, 'NO-MERGE') }}" = "false" ]; then
+          exit 0
+        else
+          echo "::error:: The 'NO-MERGE' label was applied to the PR. Merging is disabled."
+          exit 1
+        fi
diff --git a/.github/workflows/check-service-labels.yml b/.github/workflows/check-service-labels.yml
index 5261cc165ee128..2d85e4d278a393 100644
--- a/.github/workflows/check-service-labels.yml
+++ b/.github/workflows/check-service-labels.yml
@@ -15,7 +15,7 @@ jobs:
     steps:
     - name: Check 'Servicing-approved' label
       run: |
-        echo "Merging permission is enabled for servicing PRs when the `Servicing-approved` label is applied."
+        echo "Merging permission is enabled for servicing PRs when the 'Servicing-approved' label is applied."
         if [ "${{ contains(github.event.pull_request.labels.*.name, 'Servicing-approved') }}" = "true" ]; then
           exit 0
         else
diff --git a/Directory.Build.props b/Directory.Build.props
index 7b78bd6efc0f6b..5eeb3ed7f86822 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -63,7 +63,7 @@
         - src/mono/msbuild/apple/build/AppleBuild.targets
         - src/installer/pkg/sfx/bundle/shared-framework-distribution-template-x64.xml
         - src/installer/pkg/sfx/bundle/shared-framework-distribution-template-arm64.xml
-        - src/tasks/AotCompilerTask/MonoAOTCompiler.props
+        - src/mono/msbuild/common/MonoAOTCompiler.props
         - src/tasks/AppleAppBuilder/Xcode.cs
         - src/tasks/MobileBuildTasks/Apple/AppleProject.cs
         - https://github.com/dotnet/sdk repo > src/Installer/redist-installer/targets/GeneratePKG.targets
diff --git a/eng/SourceBuildPrebuiltBaseline.xml b/eng/SourceBuildPrebuiltBaseline.xml
index 4e5b0e471c1565..69be84119be4d9 100644
--- a/eng/SourceBuildPrebuiltBaseline.xml
+++ b/eng/SourceBuildPrebuiltBaseline.xml
@@ -16,6 +16,7 @@
     <UsagePattern IdentityGlob="*Microsoft.DotNet.ILCompiler/*10.*" />
 
     <!-- Allowed and pinned to SDK version -->
+    <UsagePattern IdentityGlob="Microsoft.DotNet.ApiCompat.Task/*10.*" />
     <UsagePattern IdentityGlob="Microsoft.NET.ILLink.Tasks/*10.*" />
 
     <!-- This version is brought in transitively from NuGet.Packaging.6.2.4.
diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index ba3d11047279c6..c41bc071cf432a 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -1,8 +1,8 @@
 <Dependencies>
   <ProductDependencies>
-    <Dependency Name="Microsoft.NETCore.Runtime.ICU.Transport" Version="10.0.0-alpha.1.24627.1">
+    <Dependency Name="Microsoft.NETCore.Runtime.ICU.Transport" Version="10.0.0-preview.2.25074.1">
       <Uri>https://github.com/dotnet/icu</Uri>
-      <Sha>07c054f3dd6e1e8374a3bfcba369c0f027f1774c</Sha>
+      <Sha>61cf0b87b7ea887003d5fe9fc3cdc7d50bfb8ed3</Sha>
     </Dependency>
     <Dependency Name="System.ServiceModel.Primitives" Version="4.9.0-rc2.21473.1">
       <Uri>https://github.com/dotnet/wcf</Uri>
@@ -50,14 +50,14 @@
       <Sha>feb61c7f328a2401d74f4317b39d02126cfdfe24</Sha>
       <SourceBuild RepoName="command-line-api" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Cecil" Version="0.11.5-alpha.24627.1">
+    <Dependency Name="Microsoft.DotNet.Cecil" Version="0.11.5-alpha.25069.2">
       <Uri>https://github.com/dotnet/cecil</Uri>
-      <Sha>9e8bd520939ddfee686261267a1646c1b113d9e1</Sha>
+      <Sha>2d5c8fb9aa8bd4c7fc085a73520061075c601655</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.cecil" Version="0.11.5-alpha.24627.1">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.cecil" Version="0.11.5-alpha.25069.2">
       <Uri>https://github.com/dotnet/cecil</Uri>
-      <Sha>9e8bd520939ddfee686261267a1646c1b113d9e1</Sha>
+      <Sha>2d5c8fb9aa8bd4c7fc085a73520061075c601655</Sha>
       <SourceBuild RepoName="cecil" ManagedOnly="true" />
     </Dependency>
     <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-10.0.100.Transport" Version="10.0.0-alpha.1.25059.1">
@@ -77,146 +77,146 @@
       <SourceBuild RepoName="source-build-reference-packages" ManagedOnly="true" />
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-externals" Version="10.0.562301">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-externals" Version="10.0.607101">
       <Uri>https://github.com/dotnet/source-build-externals</Uri>
-      <Sha>591e522d15c8c9ffad7c7c1df1ae6a3d392717b4</Sha>
+      <Sha>0c377e9585d2aeae504ff1d6529ccb1abef36172</Sha>
       <SourceBuild RepoName="source-build-externals" ManagedOnly="true" />
     </Dependency>
   </ProductDependencies>
   <ToolsetDependencies>
-    <Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.arcade" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.arcade" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
       <SourceBuild RepoName="arcade" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XliffTasks" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.XliffTasks" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Helix.Sdk" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Helix.Sdk" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.GenAPI" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.GenAPI" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.GenFacades" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.GenFacades" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitAssert" Version="2.9.2-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.XUnitAssert" Version="2.9.2-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitExtensions" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.XUnitExtensions" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.XUnitConsoleRunner" Version="2.9.2-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.XUnitConsoleRunner" Version="2.9.2-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Archives" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Archives" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Packaging" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Packaging" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Installers" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Installers" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Templating" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Templating" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Workloads" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Workloads" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.CodeAnalysis" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.CodeAnalysis" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.TargetFramework" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.TargetFramework" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.RemoteExecutor" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.RemoteExecutor" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.Build.Tasks.Feed" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.Build.Tasks.Feed" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.VersionTools.Tasks" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.VersionTools.Tasks" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.SharedFramework.Sdk" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.SharedFramework.Sdk" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
-    <Dependency Name="System.ComponentModel.TypeConverter.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.ComponentModel.TypeConverter.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Data.Common.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Data.Common.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Drawing.Common.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Drawing.Common.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Formats.Tar.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Formats.Tar.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.IO.Compression.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.IO.Compression.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.IO.Packaging.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.IO.Packaging.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Net.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Net.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Private.Runtime.UnicodeData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Private.Runtime.UnicodeData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Runtime.TimeZoneData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Runtime.TimeZoneData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Security.Cryptography.X509Certificates.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Security.Cryptography.X509Certificates.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Text.RegularExpressions.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Text.RegularExpressions.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="System.Windows.Extensions.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Windows.Extensions.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.CilStrip.Sources" Version="10.0.0-beta.25060.2">
+    <Dependency Name="Microsoft.DotNet.CilStrip.Sources" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
     <Dependency Name="runtime.linux-arm64.Microsoft.NETCore.Runtime.Mono.LLVM.Libclang" Version="19.1.0-alpha.1.25055.2" CoherentParentDependency="Microsoft.NET.Workload.Emscripten.Current.Manifest-10.0.100.Transport">
       <Uri>https://github.com/dotnet/llvm-project</Uri>
@@ -302,39 +302,39 @@
       <Uri>https://github.com/dotnet/llvm-project</Uri>
       <Sha>317598aea216019b6164f599859c738f69595c60</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NETCore.App.Runtime.win-x64" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="Microsoft.NETCore.App.Runtime.win-x64" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
-    <Dependency Name="runtime.native.System.IO.Ports" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="runtime.native.System.IO.Ports" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NETCore.ILAsm" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="Microsoft.NETCore.ILAsm" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NET.Sdk.IL" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="Microsoft.NET.Sdk.IL" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
-    <Dependency Name="System.Text.Json" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="System.Text.Json" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.runtime.linux-x64" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.runtime.linux-x64" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
       <SourceBuild RepoName="runtime" ManagedOnly="false" />
     </Dependency>
-    <Dependency Name="System.Reflection.Metadata" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="System.Reflection.Metadata" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
-    <Dependency Name="System.Reflection.MetadataLoadContext" Version="10.0.0-alpha.1.25057.24">
+    <Dependency Name="System.Reflection.MetadataLoadContext" Version="10.0.0-alpha.1.25068.1">
       <Uri>https://github.com/dotnet/runtime</Uri>
-      <Sha>5c6d1b3f7b63a3150ce6c737aeb4af03b3cce621</Sha>
+      <Sha>29013d8ae50f5bc35427a9155234ccebfa5e227c</Sha>
     </Dependency>
     <Dependency Name="Microsoft.DotNet.XHarness.TestRunners.Common" Version="10.0.0-prerelease.24610.1">
       <Uri>https://github.com/dotnet/xharness</Uri>
@@ -348,9 +348,9 @@
       <Uri>https://github.com/dotnet/xharness</Uri>
       <Sha>3119edb6d70fb252e6128b0c7e45d3fc2f49f249</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.PackageTesting" Version="10.0.0-beta.25058.4">
+    <Dependency Name="Microsoft.DotNet.PackageTesting" Version="10.0.0-beta.25079.2">
       <Uri>https://github.com/dotnet/arcade</Uri>
-      <Sha>e7cb34898a1b610eb2a22591a2178da6f1fb7e3c</Sha>
+      <Sha>3bb46f96cc988a80a414f45394f8a9ce54b47d3b</Sha>
     </Dependency>
     <Dependency Name="optimization.windows_nt-x64.MIBC.Runtime" Version="1.0.0-prerelease.25067.2">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
@@ -368,25 +368,25 @@
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
       <Sha>93bf80f30db2e15a7d62c22ff80fecf3518519b1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.HotReload.Utils.Generator.BuildTool" Version="10.0.0-alpha.0.24627.1">
+    <Dependency Name="Microsoft.DotNet.HotReload.Utils.Generator.BuildTool" Version="10.0.0-alpha.0.25070.1">
       <Uri>https://github.com/dotnet/hotreload-utils</Uri>
-      <Sha>7d2f352486b2e39a7829fc7fefa7d6cf825deff5</Sha>
+      <Sha>a8ba820e852306e5098dce560629cd98e0eb8a4a</Sha>
     </Dependency>
-    <Dependency Name="System.Runtime.Numerics.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="System.Runtime.Numerics.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.Net.Compilers.Toolset" Version="4.14.0-1.25077.5">
+    <Dependency Name="Microsoft.Net.Compilers.Toolset" Version="4.14.0-2.25079.2">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>557c46c532788c16881dbe1b9bd3d938c2ed22e0</Sha>
+      <Sha>250065a15ef94895ef87f3e09b313b987375b5b1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis" Version="4.14.0-1.25077.5">
+    <Dependency Name="Microsoft.CodeAnalysis" Version="4.14.0-2.25079.2">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>557c46c532788c16881dbe1b9bd3d938c2ed22e0</Sha>
+      <Sha>250065a15ef94895ef87f3e09b313b987375b5b1</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.CodeAnalysis.CSharp" Version="4.14.0-1.25077.5">
+    <Dependency Name="Microsoft.CodeAnalysis.CSharp" Version="4.14.0-2.25079.2">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>557c46c532788c16881dbe1b9bd3d938c2ed22e0</Sha>
+      <Sha>250065a15ef94895ef87f3e09b313b987375b5b1</Sha>
     </Dependency>
     <Dependency Name="Microsoft.CodeAnalysis.Analyzers" Version="3.12.0-beta1.24629.1">
       <Uri>https://github.com/dotnet/roslyn-analyzers</Uri>
@@ -397,20 +397,14 @@
       <Sha>5ed336762c6260a83ece35cd1f6749251452bad0</Sha>
     </Dependency>
     <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.roslyn" Version="4.14.0-1.25077.5">
+    <Dependency Name="Microsoft.SourceBuild.Intermediate.roslyn" Version="4.14.0-2.25079.2">
       <Uri>https://github.com/dotnet/roslyn</Uri>
-      <Sha>557c46c532788c16881dbe1b9bd3d938c2ed22e0</Sha>
+      <Sha>250065a15ef94895ef87f3e09b313b987375b5b1</Sha>
       <SourceBuild RepoName="roslyn" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.DotNet.ApiCompat.Task" Version="10.0.100-alpha.1.24622.2">
+    <Dependency Name="Microsoft.DotNet.ApiCompat.Task" Version="10.0.100-alpha.1.25077.2">
       <Uri>https://github.com/dotnet/sdk</Uri>
-      <Sha>13330d5ded0b2b2bcd6459d6a410aa6220b11040</Sha>
-    </Dependency>
-    <!-- Intermediate is necessary for source build. -->
-    <Dependency Name="Microsoft.SourceBuild.Intermediate.sdk" Version="10.0.100-alpha.1.24622.2">
-      <Uri>https://github.com/dotnet/sdk</Uri>
-      <Sha>13330d5ded0b2b2bcd6459d6a410aa6220b11040</Sha>
-      <SourceBuild RepoName="sdk" ManagedOnly="true" />
+      <Sha>23e2ba847d79562b972dbf54eca3f87c3044d925</Sha>
     </Dependency>
     <Dependency Name="optimization.windows_nt-arm64.MIBC.Runtime" Version="1.0.0-prerelease.25067.2">
       <Uri>https://dev.azure.com/dnceng/internal/_git/dotnet-optimization</Uri>
@@ -470,9 +464,9 @@
       <Uri>https://github.com/dotnet/node</Uri>
       <Sha>703264f70f553a06adfb330378c96f56b7583273</Sha>
     </Dependency>
-    <Dependency Name="Microsoft.NET.HostModel.TestData" Version="10.0.0-beta.25060.2">
+    <Dependency Name="Microsoft.NET.HostModel.TestData" Version="10.0.0-beta.25070.2">
       <Uri>https://github.com/dotnet/runtime-assets</Uri>
-      <Sha>6082ed1bb2cfd2d394cdc0ec613c88f3754041f7</Sha>
+      <Sha>fc476e8f2d685eb7cadf6342393a0af2708f4dbf</Sha>
     </Dependency>
   </ToolsetDependencies>
 </Dependencies>
diff --git a/eng/Versions.props b/eng/Versions.props
index 31865578e84bd8..861bd235583e0c 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -44,9 +44,9 @@
       Any tools that contribute to the design-time experience should use the MicrosoftCodeAnalysisVersion_LatestVS property above to ensure
       they do not break the local dev experience.
     -->
-    <MicrosoftCodeAnalysisCSharpVersion>4.14.0-1.25077.5</MicrosoftCodeAnalysisCSharpVersion>
-    <MicrosoftCodeAnalysisVersion>4.14.0-1.25077.5</MicrosoftCodeAnalysisVersion>
-    <MicrosoftNetCompilersToolsetVersion>4.14.0-1.25077.5</MicrosoftNetCompilersToolsetVersion>
+    <MicrosoftCodeAnalysisCSharpVersion>4.14.0-2.25079.2</MicrosoftCodeAnalysisCSharpVersion>
+    <MicrosoftCodeAnalysisVersion>4.14.0-2.25079.2</MicrosoftCodeAnalysisVersion>
+    <MicrosoftNetCompilersToolsetVersion>4.14.0-2.25079.2</MicrosoftNetCompilersToolsetVersion>
   </PropertyGroup>
   <!--
     For source generator support we need to target multiple versions of Roslyn in order to be able to run on older versions of Roslyn.
@@ -83,33 +83,33 @@
   <PropertyGroup>
     <StaticCsVersion>0.2.0</StaticCsVersion>
     <!-- SDK dependencies (also used in wasm build tests -->
-    <MicrosoftDotNetApiCompatTaskVersion>10.0.100-alpha.1.24622.2</MicrosoftDotNetApiCompatTaskVersion>
+    <MicrosoftDotNetApiCompatTaskVersion>10.0.100-alpha.1.25077.2</MicrosoftDotNetApiCompatTaskVersion>
     <!-- Arcade dependencies -->
-    <MicrosoftDotNetBuildTasksFeedVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksFeedVersion>
-    <MicrosoftDotNetCodeAnalysisVersion>10.0.0-beta.25058.4</MicrosoftDotNetCodeAnalysisVersion>
-    <MicrosoftDotNetGenAPIVersion>10.0.0-beta.25058.4</MicrosoftDotNetGenAPIVersion>
-    <MicrosoftDotNetGenFacadesVersion>10.0.0-beta.25058.4</MicrosoftDotNetGenFacadesVersion>
-    <MicrosoftDotNetXUnitAssertVersion>2.9.2-beta.25058.4</MicrosoftDotNetXUnitAssertVersion>
-    <MicrosoftDotNetXUnitExtensionsVersion>10.0.0-beta.25058.4</MicrosoftDotNetXUnitExtensionsVersion>
-    <MicrosoftDotNetXUnitConsoleRunnerVersion>2.9.2-beta.25058.4</MicrosoftDotNetXUnitConsoleRunnerVersion>
-    <MicrosoftDotNetBuildTasksArchivesVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksArchivesVersion>
-    <MicrosoftDotNetBuildTasksInstallersVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksInstallersVersion>
-    <MicrosoftDotNetBuildTasksPackagingVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksPackagingVersion>
-    <MicrosoftDotNetBuildTasksTargetFrameworkVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksTargetFrameworkVersion>
-    <MicrosoftDotNetBuildTasksTemplatingVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksTemplatingVersion>
-    <MicrosoftDotNetBuildTasksWorkloadsPackageVersion>10.0.0-beta.25058.4</MicrosoftDotNetBuildTasksWorkloadsPackageVersion>
-    <MicrosoftDotNetRemoteExecutorVersion>10.0.0-beta.25058.4</MicrosoftDotNetRemoteExecutorVersion>
-    <MicrosoftDotNetVersionToolsTasksVersion>10.0.0-beta.25058.4</MicrosoftDotNetVersionToolsTasksVersion>
-    <MicrosoftDotNetPackageTestingVersion>10.0.0-beta.25058.4</MicrosoftDotNetPackageTestingVersion>
+    <MicrosoftDotNetBuildTasksFeedVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksFeedVersion>
+    <MicrosoftDotNetCodeAnalysisVersion>10.0.0-beta.25079.2</MicrosoftDotNetCodeAnalysisVersion>
+    <MicrosoftDotNetGenAPIVersion>10.0.0-beta.25079.2</MicrosoftDotNetGenAPIVersion>
+    <MicrosoftDotNetGenFacadesVersion>10.0.0-beta.25079.2</MicrosoftDotNetGenFacadesVersion>
+    <MicrosoftDotNetXUnitAssertVersion>2.9.2-beta.25079.2</MicrosoftDotNetXUnitAssertVersion>
+    <MicrosoftDotNetXUnitExtensionsVersion>10.0.0-beta.25079.2</MicrosoftDotNetXUnitExtensionsVersion>
+    <MicrosoftDotNetXUnitConsoleRunnerVersion>2.9.2-beta.25079.2</MicrosoftDotNetXUnitConsoleRunnerVersion>
+    <MicrosoftDotNetBuildTasksArchivesVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksArchivesVersion>
+    <MicrosoftDotNetBuildTasksInstallersVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksInstallersVersion>
+    <MicrosoftDotNetBuildTasksPackagingVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksPackagingVersion>
+    <MicrosoftDotNetBuildTasksTargetFrameworkVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksTargetFrameworkVersion>
+    <MicrosoftDotNetBuildTasksTemplatingVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksTemplatingVersion>
+    <MicrosoftDotNetBuildTasksWorkloadsPackageVersion>10.0.0-beta.25079.2</MicrosoftDotNetBuildTasksWorkloadsPackageVersion>
+    <MicrosoftDotNetRemoteExecutorVersion>10.0.0-beta.25079.2</MicrosoftDotNetRemoteExecutorVersion>
+    <MicrosoftDotNetVersionToolsTasksVersion>10.0.0-beta.25079.2</MicrosoftDotNetVersionToolsTasksVersion>
+    <MicrosoftDotNetPackageTestingVersion>10.0.0-beta.25079.2</MicrosoftDotNetPackageTestingVersion>
     <!-- TODO: Remove pinned xunit.analyzers version: https://github.com/dotnet/runtime/issues/97088 -->
     <XUnitAnalyzersVersion>1.4.0</XUnitAnalyzersVersion>
     <!-- NuGet dependencies -->
     <NuGetBuildTasksPackVersion>6.0.0-preview.1.102</NuGetBuildTasksPackVersion>
     <!-- Installer dependencies -->
-    <MicrosoftNETCoreAppRuntimewinx64Version>10.0.0-alpha.1.25057.24</MicrosoftNETCoreAppRuntimewinx64Version>
+    <MicrosoftNETCoreAppRuntimewinx64Version>10.0.0-alpha.1.25068.1</MicrosoftNETCoreAppRuntimewinx64Version>
     <MicrosoftExtensionsDependencyModelVersion>6.0.0</MicrosoftExtensionsDependencyModelVersion>
     <!-- ILAsm dependencies -->
-    <MicrosoftNETCoreILAsmVersion>10.0.0-alpha.1.25057.24</MicrosoftNETCoreILAsmVersion>
+    <MicrosoftNETCoreILAsmVersion>10.0.0-alpha.1.25068.1</MicrosoftNETCoreILAsmVersion>
     <!-- Libraries dependencies -->
     <MicrosoftBclAsyncInterfacesVersion>6.0.0</MicrosoftBclAsyncInterfacesVersion>
     <MicrosoftBclHashCodeVersion>6.0.0</MicrosoftBclHashCodeVersion>
@@ -123,46 +123,46 @@
     <SystemIOFileSystemAccessControlVersion>5.0.0</SystemIOFileSystemAccessControlVersion>
     <SystemMemoryVersion>4.6.0</SystemMemoryVersion>
     <SystemNumericsVectorsVersion>4.6.0</SystemNumericsVectorsVersion>
-    <SystemReflectionMetadataVersion>10.0.0-alpha.1.25057.24</SystemReflectionMetadataVersion>
-    <SystemReflectionMetadataLoadContextVersion>10.0.0-alpha.1.25057.24</SystemReflectionMetadataLoadContextVersion>
+    <SystemReflectionMetadataVersion>10.0.0-alpha.1.25068.1</SystemReflectionMetadataVersion>
+    <SystemReflectionMetadataLoadContextVersion>10.0.0-alpha.1.25068.1</SystemReflectionMetadataLoadContextVersion>
     <SystemSecurityAccessControlVersion>6.0.0</SystemSecurityAccessControlVersion>
     <SystemSecurityCryptographyCngVersion>5.0.0</SystemSecurityCryptographyCngVersion>
     <SystemSecurityCryptographyOpenSslVersion>5.0.0</SystemSecurityCryptographyOpenSslVersion>
     <SystemSecurityPrincipalWindowsVersion>5.0.0</SystemSecurityPrincipalWindowsVersion>
     <SystemSecurityPermissionsVersion>7.0.0</SystemSecurityPermissionsVersion>
-    <SystemTextJsonVersion>10.0.0-alpha.1.25057.24</SystemTextJsonVersion>
+    <SystemTextJsonVersion>10.0.0-alpha.1.25068.1</SystemTextJsonVersion>
     <SystemRuntimeCompilerServicesUnsafeVersion>6.1.0</SystemRuntimeCompilerServicesUnsafeVersion>
     <SystemThreadingAccessControlVersion>7.0.0</SystemThreadingAccessControlVersion>
     <SystemThreadingTasksExtensionsVersion>4.6.0</SystemThreadingTasksExtensionsVersion>
     <SystemValueTupleVersion>4.5.0</SystemValueTupleVersion>
-    <runtimenativeSystemIOPortsVersion>10.0.0-alpha.1.25057.24</runtimenativeSystemIOPortsVersion>
+    <runtimenativeSystemIOPortsVersion>10.0.0-alpha.1.25068.1</runtimenativeSystemIOPortsVersion>
     <!-- Keep toolset versions in sync with dotnet/msbuild and dotnet/sdk -->
     <SystemCollectionsImmutableToolsetVersion>8.0.0</SystemCollectionsImmutableToolsetVersion>
     <SystemTextJsonToolsetVersion>8.0.4</SystemTextJsonToolsetVersion>
     <SystemReflectionMetadataToolsetVersion>8.0.0</SystemReflectionMetadataToolsetVersion>
     <SystemReflectionMetadataLoadContextToolsetVersion>8.0.0</SystemReflectionMetadataLoadContextToolsetVersion>
     <!-- Runtime-Assets dependencies -->
-    <SystemRuntimeNumericsTestDataVersion>10.0.0-beta.25060.2</SystemRuntimeNumericsTestDataVersion>
-    <SystemComponentModelTypeConverterTestDataVersion>10.0.0-beta.25060.2</SystemComponentModelTypeConverterTestDataVersion>
-    <SystemDataCommonTestDataVersion>10.0.0-beta.25060.2</SystemDataCommonTestDataVersion>
-    <SystemDrawingCommonTestDataVersion>10.0.0-beta.25060.2</SystemDrawingCommonTestDataVersion>
-    <SystemFormatsTarTestDataVersion>10.0.0-beta.25060.2</SystemFormatsTarTestDataVersion>
-    <SystemIOCompressionTestDataVersion>10.0.0-beta.25060.2</SystemIOCompressionTestDataVersion>
-    <SystemIOPackagingTestDataVersion>10.0.0-beta.25060.2</SystemIOPackagingTestDataVersion>
-    <SystemNetTestDataVersion>10.0.0-beta.25060.2</SystemNetTestDataVersion>
-    <SystemPrivateRuntimeUnicodeDataVersion>10.0.0-beta.25060.2</SystemPrivateRuntimeUnicodeDataVersion>
-    <SystemRuntimeTimeZoneDataVersion>10.0.0-beta.25060.2</SystemRuntimeTimeZoneDataVersion>
-    <SystemSecurityCryptographyX509CertificatesTestDataVersion>10.0.0-beta.25060.2</SystemSecurityCryptographyX509CertificatesTestDataVersion>
-    <SystemTextRegularExpressionsTestDataVersion>10.0.0-beta.25060.2</SystemTextRegularExpressionsTestDataVersion>
-    <SystemWindowsExtensionsTestDataVersion>10.0.0-beta.25060.2</SystemWindowsExtensionsTestDataVersion>
-    <MicrosoftDotNetCilStripSourcesVersion>10.0.0-beta.25060.2</MicrosoftDotNetCilStripSourcesVersion>
-    <MicrosoftNETHostModelTestDataVersion>10.0.0-beta.25060.2</MicrosoftNETHostModelTestDataVersion>
+    <SystemRuntimeNumericsTestDataVersion>10.0.0-beta.25070.2</SystemRuntimeNumericsTestDataVersion>
+    <SystemComponentModelTypeConverterTestDataVersion>10.0.0-beta.25070.2</SystemComponentModelTypeConverterTestDataVersion>
+    <SystemDataCommonTestDataVersion>10.0.0-beta.25070.2</SystemDataCommonTestDataVersion>
+    <SystemDrawingCommonTestDataVersion>10.0.0-beta.25070.2</SystemDrawingCommonTestDataVersion>
+    <SystemFormatsTarTestDataVersion>10.0.0-beta.25070.2</SystemFormatsTarTestDataVersion>
+    <SystemIOCompressionTestDataVersion>10.0.0-beta.25070.2</SystemIOCompressionTestDataVersion>
+    <SystemIOPackagingTestDataVersion>10.0.0-beta.25070.2</SystemIOPackagingTestDataVersion>
+    <SystemNetTestDataVersion>10.0.0-beta.25070.2</SystemNetTestDataVersion>
+    <SystemPrivateRuntimeUnicodeDataVersion>10.0.0-beta.25070.2</SystemPrivateRuntimeUnicodeDataVersion>
+    <SystemRuntimeTimeZoneDataVersion>10.0.0-beta.25070.2</SystemRuntimeTimeZoneDataVersion>
+    <SystemSecurityCryptographyX509CertificatesTestDataVersion>10.0.0-beta.25070.2</SystemSecurityCryptographyX509CertificatesTestDataVersion>
+    <SystemTextRegularExpressionsTestDataVersion>10.0.0-beta.25070.2</SystemTextRegularExpressionsTestDataVersion>
+    <SystemWindowsExtensionsTestDataVersion>10.0.0-beta.25070.2</SystemWindowsExtensionsTestDataVersion>
+    <MicrosoftDotNetCilStripSourcesVersion>10.0.0-beta.25070.2</MicrosoftDotNetCilStripSourcesVersion>
+    <MicrosoftNETHostModelTestDataVersion>10.0.0-beta.25070.2</MicrosoftNETHostModelTestDataVersion>
     <!-- xharness dependencies -->
     <MicrosoftDotNetXHarnessTestRunnersCommonVersion>10.0.0-prerelease.24610.1</MicrosoftDotNetXHarnessTestRunnersCommonVersion>
     <MicrosoftDotNetXHarnessTestRunnersXunitVersion>10.0.0-prerelease.24610.1</MicrosoftDotNetXHarnessTestRunnersXunitVersion>
     <MicrosoftDotNetXHarnessCLIVersion>10.0.0-prerelease.24610.1</MicrosoftDotNetXHarnessCLIVersion>
     <!-- hotreload-utils dependencies -->
-    <MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>10.0.0-alpha.0.24627.1</MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>
+    <MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>10.0.0-alpha.0.25070.1</MicrosoftDotNetHotReloadUtilsGeneratorBuildToolVersion>
     <!-- dotnet-optimization dependencies -->
     <optimizationwindows_ntx64MIBCRuntimeVersion>1.0.0-prerelease.25067.2</optimizationwindows_ntx64MIBCRuntimeVersion>
     <optimizationwindows_ntx86MIBCRuntimeVersion>1.0.0-prerelease.25067.2</optimizationwindows_ntx86MIBCRuntimeVersion>
@@ -219,9 +219,9 @@
     <!-- Docs -->
     <MicrosoftPrivateIntellisenseVersion>9.0.0-preview-20241010.1</MicrosoftPrivateIntellisenseVersion>
     <!-- Mono Cecil -->
-    <MicrosoftDotNetCecilVersion>0.11.5-alpha.24627.1</MicrosoftDotNetCecilVersion>
+    <MicrosoftDotNetCecilVersion>0.11.5-alpha.25069.2</MicrosoftDotNetCecilVersion>
     <!-- ICU -->
-    <MicrosoftNETCoreRuntimeICUTransportVersion>10.0.0-alpha.1.24627.1</MicrosoftNETCoreRuntimeICUTransportVersion>
+    <MicrosoftNETCoreRuntimeICUTransportVersion>10.0.0-preview.2.25074.1</MicrosoftNETCoreRuntimeICUTransportVersion>
     <!-- MsQuic -->
     <MicrosoftNativeQuicMsQuicSchannelVersion>2.4.3</MicrosoftNativeQuicMsQuicSchannelVersion>
     <SystemNetMsQuicTransportVersion>9.0.0-alpha.1.24167.3</SystemNetMsQuicTransportVersion>
diff --git a/eng/common/internal/Tools.csproj b/eng/common/internal/Tools.csproj
index 32f79dfb3402c0..feaa6d20812d8f 100644
--- a/eng/common/internal/Tools.csproj
+++ b/eng/common/internal/Tools.csproj
@@ -15,16 +15,6 @@
     <PackageReference Include="Microsoft.DotNet.IBCMerge" Version="$(MicrosoftDotNetIBCMergeVersion)" Condition="'$(UsingToolIbcOptimization)' == 'true'" />
     <PackageReference Include="Drop.App" Version="$(DropAppVersion)" ExcludeAssets="all" Condition="'$(UsingToolVisualStudioIbcTraining)' == 'true'"/>
   </ItemGroup>
-  <PropertyGroup>
-    <RestoreSources></RestoreSources>
-    <RestoreSources Condition="'$(UsingToolIbcOptimization)' == 'true'">
-      https://devdiv.pkgs.visualstudio.com/_packaging/dotnet-core-internal-tooling/nuget/v3/index.json;
-    </RestoreSources>
-    <RestoreSources Condition="'$(UsingToolVisualStudioIbcTraining)' == 'true'">
-      $(RestoreSources);
-      https://devdiv.pkgs.visualstudio.com/_packaging/VS/nuget/v3/index.json;
-    </RestoreSources>
-  </PropertyGroup>
 
   <!-- Repository extensibility point -->
   <Import Project="$(RepositoryEngineeringDir)InternalTools.props" Condition="Exists('$(RepositoryEngineeringDir)InternalTools.props')" />
diff --git a/eng/common/tools.ps1 b/eng/common/tools.ps1
index 04b02f4fd3cc86..80f9130b15087c 100644
--- a/eng/common/tools.ps1
+++ b/eng/common/tools.ps1
@@ -42,7 +42,7 @@
 [bool]$useInstalledDotNetCli = if (Test-Path variable:useInstalledDotNetCli) { $useInstalledDotNetCli } else { $true }
 
 # Enable repos to use a particular version of the on-line dotnet-install scripts.
-#    default URL: https://dotnet.microsoft.com/download/dotnet/scripts/v1/dotnet-install.ps1
+#    default URL: https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.ps1
 [string]$dotnetInstallScriptVersion = if (Test-Path variable:dotnetInstallScriptVersion) { $dotnetInstallScriptVersion } else { 'v1' }
 
 # True to use global NuGet cache instead of restoring packages to repository-local directory.
@@ -262,7 +262,7 @@ function GetDotNetInstallScript([string] $dotnetRoot) {
   if (!(Test-Path $installScript)) {
     Create-Directory $dotnetRoot
     $ProgressPreference = 'SilentlyContinue' # Don't display the console progress UI - it's a huge perf hit
-    $uri = "https://dotnet.microsoft.com/download/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.ps1"
+    $uri = "https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.ps1"
 
     Retry({
       Write-Host "GET $uri"
diff --git a/eng/common/tools.sh b/eng/common/tools.sh
index 40485a0f59de16..df203b5178421d 100755
--- a/eng/common/tools.sh
+++ b/eng/common/tools.sh
@@ -54,7 +54,7 @@ warn_as_error=${warn_as_error:-true}
 use_installed_dotnet_cli=${use_installed_dotnet_cli:-true}
 
 # Enable repos to use a particular version of the on-line dotnet-install scripts.
-#    default URL: https://dotnet.microsoft.com/download/dotnet/scripts/v1/dotnet-install.sh
+#    default URL: https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.sh
 dotnetInstallScriptVersion=${dotnetInstallScriptVersion:-'v1'}
 
 # True to use global NuGet cache instead of restoring packages to repository-local directory.
@@ -295,7 +295,7 @@ function with_retries {
 function GetDotNetInstallScript {
   local root=$1
   local install_script="$root/dotnet-install.sh"
-  local install_script_url="https://dotnet.microsoft.com/download/dotnet/scripts/$dotnetInstallScriptVersion/dotnet-install.sh"
+  local install_script_url="https://builds.dotnet.microsoft.com/dotnet/scripts/v1/dotnet-install.sh"
 
   if [[ ! -a "$install_script" ]]; then
     mkdir -p "$root"
diff --git a/eng/pipelines/common/xplat-setup.yml b/eng/pipelines/common/xplat-setup.yml
index 8bedc3fbce652c..fda4c66b4a791f 100644
--- a/eng/pipelines/common/xplat-setup.yml
+++ b/eng/pipelines/common/xplat-setup.yml
@@ -71,6 +71,8 @@ jobs:
           value: zip
         - name: tarCompression
           value: ''
+        - name: exeExt
+          value: '.exe'
         - name: scriptExt
           value: '.cmd'
         - name: dir
@@ -91,6 +93,8 @@ jobs:
           value: tar
         - name: tarCompression
           value: gz
+        - name: exeExt
+          value: ''
         - name: scriptExt
           value: '.sh'
         - name: dir
diff --git a/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml b/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
index bf598beec35d35..01f9ed4cfb5309 100644
--- a/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
+++ b/eng/pipelines/coreclr/templates/crossgen2-comparison-build-job.yml
@@ -69,10 +69,7 @@ jobs:
       - name: target_crossgen2_os
         value: osx
     - name: crossgen2location
-      value: $(productDirectory)$(dir)$(targetFlavor)$(dir)crossgen2$(dir)crossgen2.dll
-    - ${{ if ne(parameters.archType, 'x64') }}:
-      - name: crossgen2location
-        value: $(productDirectory)$(dir)$(targetFlavor)$(dir)x64$(dir)crossgen2$(dir)crossgen2.dll
+      value: $(binDirectory)$(dir)crossgen2_inbuild$(dir)$(archType)$(dir)$(buildConfigUpper)$(dir)crossgen2.dll
     - name: librariesProductDllDir
       value: $(Build.SourcesDirectory)$(dir)artifacts$(dir)bin$(dir)runtime$(dir)net10.0-$(osGroup)$(osSubgroup)-$(buildConfig)-$(archType)
 
diff --git a/global.json b/global.json
index 20e56cc5204710..41f301cd8a0f98 100644
--- a/global.json
+++ b/global.json
@@ -1,18 +1,18 @@
 {
   "sdk": {
-    "version": "10.0.100-alpha.1.24610.7",
+    "version": "10.0.100-alpha.1.25077.2",
     "allowPrerelease": true,
     "rollForward": "major"
   },
   "tools": {
-    "dotnet": "10.0.100-alpha.1.24610.7"
+    "dotnet": "10.0.100-alpha.1.25077.2"
   },
   "msbuild-sdks": {
-    "Microsoft.DotNet.Arcade.Sdk": "10.0.0-beta.25058.4",
-    "Microsoft.DotNet.Helix.Sdk": "10.0.0-beta.25058.4",
-    "Microsoft.DotNet.SharedFramework.Sdk": "10.0.0-beta.25058.4",
+    "Microsoft.DotNet.Arcade.Sdk": "10.0.0-beta.25079.2",
+    "Microsoft.DotNet.Helix.Sdk": "10.0.0-beta.25079.2",
+    "Microsoft.DotNet.SharedFramework.Sdk": "10.0.0-beta.25079.2",
     "Microsoft.Build.NoTargets": "3.7.0",
     "Microsoft.Build.Traversal": "3.4.0",
-    "Microsoft.NET.Sdk.IL": "10.0.0-alpha.1.25057.24"
+    "Microsoft.NET.Sdk.IL": "10.0.0-alpha.1.25068.1"
   }
 }
diff --git a/src/coreclr/gc/unix/config.gc.h.in b/src/coreclr/gc/unix/config.gc.h.in
index 01cb767798fbcd..dfc38aea6b8470 100644
--- a/src/coreclr/gc/unix/config.gc.h.in
+++ b/src/coreclr/gc/unix/config.gc.h.in
@@ -6,6 +6,7 @@
 
 #cmakedefine01 HAVE_SYS_TIME_H
 #cmakedefine01 HAVE_SYS_MMAN_H
+#cmakedefine01 HAVE_SYS_MEMBARRIER_H
 #cmakedefine01 HAVE_PTHREAD_THREADID_NP
 #cmakedefine01 HAVE_PTHREAD_GETTHREADID_NP
 #cmakedefine01 HAVE_VM_FLAGS_SUPERPAGE_SIZE_ANY
diff --git a/src/coreclr/gc/unix/configure.cmake b/src/coreclr/gc/unix/configure.cmake
index c3b301f58938f0..8d33b81a32f727 100644
--- a/src/coreclr/gc/unix/configure.cmake
+++ b/src/coreclr/gc/unix/configure.cmake
@@ -11,6 +11,7 @@ include(CheckLibraryExists)
 check_include_files(sys/time.h HAVE_SYS_TIME_H)
 check_include_files(sys/mman.h HAVE_SYS_MMAN_H)
 check_include_files(pthread_np.h HAVE_PTHREAD_NP_H)
+check_include_files(sys/membarrier.h HAVE_SYS_MEMBARRIER_H)
 
 check_function_exists(vm_allocate HAVE_VM_ALLOCATE)
 check_function_exists(sysctlbyname HAVE_SYSCTLBYNAME)
diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp
index 37ce5943a20ff8..43588c66eb015a 100644
--- a/src/coreclr/gc/unix/gcenv.unix.cpp
+++ b/src/coreclr/gc/unix/gcenv.unix.cpp
@@ -29,6 +29,14 @@
 #include <sys/swap.h>
 #endif
 
+#ifdef __linux__
+#include <linux/membarrier.h>
+#include <sys/syscall.h>
+#define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
+#elif HAVE_SYS_MEMBARRIER_H
+#include <sys/membarrier.h>
+#endif
+
 #include <sys/resource.h>
 
 #undef min
@@ -94,10 +102,6 @@ extern "C"
 #include <OS.h>
 #endif // __HAIKU__
 
-#ifdef __linux__
-#include <sys/syscall.h> // __NR_membarrier
-#endif
-
 #if HAVE_PTHREAD_NP_H
 #include <pthread_np.h>
 #endif
@@ -132,29 +136,9 @@ typedef cpuset_t cpu_set_t;
 // The cached total number of CPUs that can be used in the OS.
 static uint32_t g_totalCpuCount = 0;
 
-//
-// Helper membarrier function
-//
-#ifdef __NR_membarrier
-# define membarrier(...)  syscall(__NR_membarrier, __VA_ARGS__)
-#else
-# define membarrier(...)  -ENOSYS
-#endif
-
-enum membarrier_cmd
-{
-    MEMBARRIER_CMD_QUERY                                 = 0,
-    MEMBARRIER_CMD_GLOBAL                                = (1 << 0),
-    MEMBARRIER_CMD_GLOBAL_EXPEDITED                      = (1 << 1),
-    MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED             = (1 << 2),
-    MEMBARRIER_CMD_PRIVATE_EXPEDITED                     = (1 << 3),
-    MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED            = (1 << 4),
-    MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE           = (1 << 5),
-    MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE  = (1 << 6)
-};
-
 bool CanFlushUsingMembarrier()
 {
+#if defined(__linux__) || HAVE_SYS_MEMBARRIER_H
 
 #ifdef TARGET_ANDROID
     // Avoid calling membarrier on older Android versions where membarrier
@@ -169,15 +153,16 @@ bool CanFlushUsingMembarrier()
     // Starting with Linux kernel 4.14, process memory barriers can be generated
     // using MEMBARRIER_CMD_PRIVATE_EXPEDITED.
 
-    int mask = membarrier(MEMBARRIER_CMD_QUERY, 0);
+    int mask = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
 
     if (mask >= 0 &&
         mask & MEMBARRIER_CMD_PRIVATE_EXPEDITED &&
         // Register intent to use the private expedited command.
-        membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0) == 0)
+        membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0) == 0)
     {
         return true;
     }
+#endif
 
     return false;
 }
@@ -423,12 +408,15 @@ bool GCToOSInterface::CanGetCurrentProcessorNumber()
 // Flush write buffers of processors that are executing threads of the current process
 void GCToOSInterface::FlushProcessWriteBuffers()
 {
+#if defined(__linux__) || HAVE_SYS_MEMBARRIER_H
     if (s_flushUsingMemBarrier)
     {
-        int status = membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
+        int status = membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0);
         assert(status == 0 && "Failed to flush using membarrier");
     }
-    else if (g_helperPage != 0)
+    else
+#endif
+    if (g_helperPage != 0)
     {
         int status = pthread_mutex_lock(&g_flushProcessWriteBuffersMutex);
         assert(status == 0 && "Failed to lock the flushProcessWriteBuffersMutex lock");
diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp
index 9c215b0ec69918..8811349b8cdbb3 100644
--- a/src/coreclr/jit/codegenarm64.cpp
+++ b/src/coreclr/jit/codegenarm64.cpp
@@ -3490,13 +3490,15 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
 
     GenTree* operand = tree->gtGetOp1();
     // The src must be a register.
-    if (tree->OperIs(GT_NEG) && operand->isContained())
+    if (tree->OperIs(GT_NEG, GT_NOT) && operand->isContained())
     {
         genTreeOps oper = operand->OperGet();
         switch (oper)
         {
             case GT_MUL:
             {
+                assert(tree->OperIs(GT_NEG));
+
                 ins          = INS_mneg;
                 GenTree* op1 = tree->gtGetOp1();
                 GenTree* a   = op1->gtGetOp1();
@@ -3510,7 +3512,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
             case GT_RSH:
             case GT_RSZ:
             {
-                assert(ins == INS_neg || ins == INS_negs);
+                assert(ins == INS_neg || ins == INS_negs || ins == INS_mvn);
                 assert(operand->gtGetOp2()->IsCnsIntOrI());
                 assert(operand->gtGetOp2()->isContained());
 
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 4f9b2c4f0184b2..a86a2ad5b36969 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -402,12 +402,13 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr       size,
             else
             {
                 // For section constant, the immediate will be relocatable
-                GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags));
+                GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm,
+                                          INS_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags));
             }
         }
         else
         {
-            GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm DEBUGARG(targetHandle) DEBUGARG(gtFlags));
+            GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm, INS_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags));
         }
     }
     regSet.verifyRegUsed(reg);
@@ -738,12 +739,10 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
     {
         GenTree* operand = tree->gtGetOp1();
         assert(operand->isUsedFromReg());
-        regNumber operandReg = genConsumeReg(operand);
+        regNumber   operandReg = genConsumeReg(operand);
+        instruction ins        = genGetInsForOper(tree->OperGet(), targetType);
 
-        inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true);
-
-        instruction ins = genGetInsForOper(tree->OperGet(), targetType);
-        inst_RV(ins, targetReg, targetType);
+        GetEmitter()->emitIns_BASE_R_R(ins, emitActualTypeSize(tree), targetReg, operandReg);
     }
 
     genProduceReg(tree);
@@ -1063,6 +1062,8 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
     GenTree* op1 = treeNode->gtGetOp1();
     GenTree* op2 = treeNode->gtGetOp2();
 
+    bool eligibleForNDD = false;
+
     // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed"
     if (!op1->isUsedFromReg())
     {
@@ -1158,31 +1159,57 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
     // reg3 = reg3 op reg2
     else
     {
-        var_types op1Type = op1->TypeGet();
-        inst_Mov(op1Type, targetReg, op1reg, /* canSkip */ false);
-        regSet.verifyRegUsed(targetReg);
-        gcInfo.gcMarkRegPtrVal(targetReg, op1Type);
-        dst = treeNode;
-        src = op2;
+        // when reg3 != reg1 && reg3 != reg2, and NDD is available, we can use APX-EVEX.ND to optimize the codegen.
+        eligibleForNDD = emit->DoJitUseApxNDD(ins);
+        if (!eligibleForNDD)
+        {
+            var_types op1Type = op1->TypeGet();
+            inst_Mov(op1Type, targetReg, op1reg, /* canSkip */ false);
+            regSet.verifyRegUsed(targetReg);
+            gcInfo.gcMarkRegPtrVal(targetReg, op1Type);
+            dst = treeNode;
+            src = op2;
+        }
+        else
+        {
+            dst = op1;
+            src = op2;
+        }
     }
 
+    // we can assume all the floating instructions are processed and returned above.
+    assert(!varTypeIsFloating(treeNode));
+
     // try to use an inc or dec
-    if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
+    if (oper == GT_ADD && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
     {
         if (src->IsIntegralConst(1))
         {
-            emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
+            emit->emitIns_BASE_R_R(INS_inc, emitTypeSize(treeNode), targetReg, dst->GetRegNum());
             genProduceReg(treeNode);
             return;
         }
         else if (src->IsIntegralConst(-1))
         {
-            emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
+            emit->emitIns_BASE_R_R(INS_dec, emitTypeSize(treeNode), targetReg, dst->GetRegNum());
             genProduceReg(treeNode);
             return;
         }
     }
-    regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+
+    regNumber r = REG_NA;
+    if (eligibleForNDD)
+    {
+        // operands should be already formatted above
+        assert(dst->isUsedFromReg());
+        assert(op1reg != targetReg);
+        assert(op2reg != targetReg);
+        r = emit->emitIns_BASE_R_R_RM(ins, emitTypeSize(treeNode), targetReg, treeNode, dst, src);
+    }
+    else
+    {
+        r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+    }
     noway_assert(r == targetReg);
 
     if (treeNode->gtOverflowEx())
@@ -1295,10 +1322,7 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode)
         }
         assert(regOp->isUsedFromReg());
 
-        // Setup targetReg when neither of the source operands was a matching register
-        inst_Mov(targetType, mulTargetReg, regOp->GetRegNum(), /* canSkip */ true);
-
-        emit->emitInsBinary(ins, size, treeNode, rmOp);
+        emit->emitIns_BASE_R_R_RM(ins, size, mulTargetReg, treeNode, regOp, rmOp);
 
         // Move the result to the desired register, if necessary
         if (ins == INS_mulEAX)
@@ -4406,23 +4430,24 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node)
         if (imm == 1)
         {
             // inc [addr]
-            GetEmitter()->emitIns_AR(INS_inc, size, addr->GetRegNum(), 0);
+            GetEmitter()->emitIns_AR(INS_inc, size, addr->GetRegNum(), 0, INS_OPTS_EVEX_NoApxPromotion);
         }
         else if (imm == -1)
         {
             // dec [addr]
-            GetEmitter()->emitIns_AR(INS_dec, size, addr->GetRegNum(), 0);
+            GetEmitter()->emitIns_AR(INS_dec, size, addr->GetRegNum(), 0, INS_OPTS_EVEX_NoApxPromotion);
         }
         else
         {
             // add [addr], imm
-            GetEmitter()->emitIns_I_AR(INS_add, size, imm, addr->GetRegNum(), 0);
+            GetEmitter()->emitIns_I_AR(INS_add, size, imm, addr->GetRegNum(), 0, INS_OPTS_EVEX_NoApxPromotion);
         }
     }
     else
     {
         // add [addr], data
-        GetEmitter()->emitIns_AR_R(INS_add, size, data->GetRegNum(), addr->GetRegNum(), 0);
+        GetEmitter()->emitIns_AR_R(INS_add, size, data->GetRegNum(), addr->GetRegNum(), 0,
+                                   INS_OPTS_EVEX_NoApxPromotion);
     }
 }
 
@@ -4459,7 +4484,8 @@ void CodeGen::genLockedInstructions(GenTreeOp* node)
             //    or/and  dword ptr [addrReg], val
             //
             instGen(INS_lock);
-            GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0);
+            GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0,
+                                       INS_OPTS_EVEX_NoApxPromotion);
         }
         else
         {
@@ -4842,11 +4868,10 @@ void CodeGen::genCodeForShift(GenTree* tree)
                 return;
             }
 #endif
-            // First, move the operand to the destination register and
-            // later on perform the shift in-place.
-            // (LSRA will try to avoid this situation through preferencing.)
-            inst_Mov(targetType, tree->GetRegNum(), operandReg, /* canSkip */ true);
-            inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue);
+            ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
+            GetEmitter()->emitIns_BASE_R_R_I(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg, shiftByValue);
+            genProduceReg(tree);
+            return;
         }
     }
 #if defined(TARGET_64BIT)
@@ -4887,8 +4912,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
         // The operand to be shifted must not be in ECX
         noway_assert(operandReg != REG_RCX);
 
-        inst_Mov(targetType, tree->GetRegNum(), operandReg, /* canSkip */ true);
-        inst_RV(ins, tree->GetRegNum(), targetType);
+        GetEmitter()->emitIns_BASE_R_R(ins, emitTypeSize(tree), tree->GetRegNum(), operandReg);
     }
 
     genProduceReg(tree);
@@ -9237,6 +9261,96 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
 
     theEmitter->emitIns_S(INS_neg, EA_2BYTE, 0, 0);
     theEmitter->emitIns_S(INS_not, EA_2BYTE, 0, 0);
+
+    // APX-EVEX
+
+    theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_R(INS_or, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_R(INS_and, EA_2BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_R(INS_xor, EA_1BYTE, REG_R10, REG_EAX, REG_ECX, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R_I(INS_or, EA_2BYTE, REG_R10, REG_EAX, 10565, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_I(INS_or, EA_8BYTE, REG_R10, REG_EAX, 10, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_S(INS_or, EA_8BYTE, REG_R10, REG_EAX, 0, 1, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R(INS_neg, EA_2BYTE, REG_R10, REG_ECX, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R(INS_shl, EA_2BYTE, REG_R11, REG_EAX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R(INS_shl_1, EA_2BYTE, REG_R11, REG_EAX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_I(INS_shl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_I(INS_shl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_I(INS_rcr_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_I(INS_rcl_N, EA_2BYTE, REG_R11, REG_ECX, 7, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R(INS_inc, EA_2BYTE, REG_R11, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R(INS_dec, EA_2BYTE, REG_R11, REG_ECX, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R_R(INS_cmovo, EA_4BYTE, REG_R12, REG_R11, REG_EAX, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R_R(INS_imul, EA_4BYTE, REG_R12, REG_R11, REG_ECX, INS_OPTS_EVEX_nd);
+    theEmitter->emitIns_R_R_S(INS_imul, EA_4BYTE, REG_R12, REG_R11, 0, 1, INS_OPTS_EVEX_nd);
+
+    theEmitter->emitIns_R_R(INS_add, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_sub, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_and, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_or, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_xor, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_inc, EA_4BYTE, REG_R12, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_dec, EA_4BYTE, REG_R12, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_I(INS_add, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_sub, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_and, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_or, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_xor, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_S(INS_add, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_sub, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_and, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_or, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_xor, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R(INS_neg, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_shl, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_shl_1, EA_2BYTE, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_shl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_rcr_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_I(INS_rcl_N, EA_2BYTE, REG_R11, 7, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_R(INS_imul, EA_4BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_imul, EA_4BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_I(INS_imul_15, EA_4BYTE, REG_R12, 5, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R(INS_imulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_mulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_R(INS_tzcnt_apx, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_lzcnt_apx, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_popcnt_apx, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_S(INS_tzcnt_apx, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_lzcnt_apx, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_popcnt_apx, EA_8BYTE, REG_R12, 0, 1, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_R_R(INS_add, EA_2BYTE, REG_R12, REG_R13, REG_R11,
+                              (insOpts)(INS_OPTS_EVEX_nf | INS_OPTS_EVEX_nd));
+
+    theEmitter->emitIns_R_R_R(INS_andn, EA_8BYTE, REG_R11, REG_R13, REG_R11, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R_R(INS_bextr, EA_8BYTE, REG_R11, REG_R13, REG_R11, INS_OPTS_EVEX_nf);
+
+    theEmitter->emitIns_R_R(INS_blsi, EA_8BYTE, REG_R11, REG_R13, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_R(INS_blsmsk, EA_8BYTE, REG_R11, REG_R13, INS_OPTS_EVEX_nf);
+    theEmitter->emitIns_R_S(INS_blsr, EA_8BYTE, REG_R11, 0, 1);
+
+    theEmitter->emitIns_AR(INS_inc, EA_4BYTE, REG_EAX, 0, INS_OPTS_EVEX_NoApxPromotion);
+
+    theEmitter->emitIns_BASE_R_R(INS_inc, EA_4BYTE, REG_R11, REG_R12);
+    theEmitter->emitIns_BASE_R_R_I(INS_add, EA_4BYTE, REG_R11, REG_R12, 5);
 }
 
 void CodeGen::genAmd64EmitterUnitTestsAvx10v2()
@@ -11434,7 +11548,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
     if (barrierKind == BARRIER_FULL)
     {
         instGen(INS_lock);
-        GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
+        GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0, INS_OPTS_EVEX_NoApxPromotion);
     }
 }
 
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 33dda8c734ca1b..a1e84a95c89a48 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2299,6 +2299,7 @@ void Compiler::compSetProcessor()
         if (canUseApxEncoding())
         {
             codeGen->GetEmitter()->SetUseRex2Encoding(true);
+            codeGen->GetEmitter()->SetUsePromotedEVEXEncoding(true);
         }
     }
 #endif // TARGET_XARCH
@@ -4871,11 +4872,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
         DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators);
     }
 
-    // Drop back to just checking profile likelihoods.
-    //
-    activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
-    activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;
-
 #ifdef DEBUG
     fgDebugCheckLinks();
 #endif
@@ -5156,11 +5152,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
         DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition);
     }
 
-    // Drop back to just checking profile likelihoods.
-    //
-    activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
-    activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;
-
 #ifdef DEBUG
     // Stash the current estimate of the function's size if necessary.
     if (verbose && opts.OptimizationEnabled())
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 442dd8f17a5a24..df37e7ff272fde 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -4014,7 +4014,7 @@ class Compiler
 
     // false: we can add new tracked variables.
     // true: We cannot add new 'tracked' variable
-    bool     lvaTrackedFixed = false; 
+    bool     lvaTrackedFixed = false;
 
     unsigned lvaCount;        // total number of locals, which includes function arguments,
                               // special arguments, IL local variables, and JIT temporary variables
@@ -6310,8 +6310,7 @@ class Compiler
     void fgPrintEdgeWeights();
 #endif
     PhaseStatus fgComputeBlockWeights();
-    bool fgComputeMissingBlockWeights(weight_t* returnWeight);
-    bool fgComputeCalledCount(weight_t returnWeight);
+    bool fgComputeMissingBlockWeights();
 
     bool fgReorderBlocks(bool useProfile);
     void fgDoReversePostOrderLayout();
@@ -6327,7 +6326,6 @@ class Compiler
         BasicBlock** blockOrder;
         BasicBlock** tempOrder;
         unsigned numCandidateBlocks;
-        unsigned currEHRegion;
 
 #ifdef DEBUG
         weight_t GetLayoutCost(unsigned startPos, unsigned endPos);
@@ -6342,7 +6340,7 @@ class Compiler
         void AddNonFallthroughPreds(unsigned blockPos);
         bool RunGreedyThreeOptPass(unsigned startPos, unsigned endPos);
 
-        bool RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock);
+        bool RunThreeOptPass();
 
     public:
         ThreeOptLayout(Compiler* comp);
@@ -6925,7 +6923,7 @@ class Compiler
     unsigned acdCount = 0;
 
     // Get the index to use as part of the AddCodeDsc key for sharing throw blocks
-    unsigned bbThrowIndex(BasicBlock* blk, AcdKeyDesignator* dsg); 
+    unsigned bbThrowIndex(BasicBlock* blk, AcdKeyDesignator* dsg);
 
     struct AddCodeDscKey
     {
@@ -6933,7 +6931,7 @@ class Compiler
         AddCodeDscKey(): acdKind(SCK_NONE), acdData(0) {}
         AddCodeDscKey(SpecialCodeKind kind, BasicBlock* block, Compiler* comp);
         AddCodeDscKey(AddCodeDsc* add);
-        
+
         static bool Equals(const AddCodeDscKey& x, const AddCodeDscKey& y)
         {
             return (x.acdData == y.acdData) && (x.acdKind == y.acdKind);
@@ -10014,10 +10012,10 @@ class Compiler
     }
 
     //------------------------------------------------------------------------
-    // canUseRex2Encoding - Answer the question: Is Rex2 encoding supported on this target.
+    // canUseApxEncoding - Answer the question: Are APX encodings supported on this target.
     //
     // Returns:
-    //    `true` if Rex2 encoding is supported, `false` if not.
+    //    `true` if APX encoding is supported, `false` if not.
     //
     bool canUseApxEncoding() const
     {
@@ -10069,7 +10067,7 @@ class Compiler
     bool DoJitStressRex2Encoding() const
     {
 #ifdef DEBUG
-        if (JitConfig.JitStressRex2Encoding() && compOpportunisticallyDependsOn(InstructionSet_APX))
+        if (JitConfig.JitStressRex2Encoding())
         {
             // we should make sure EVEX is also stressed when REX2 is stressed, as we will need to guarantee EGPR
             // functionality is properly turned on for every instructions when REX2 is stress.
@@ -10084,13 +10082,30 @@ class Compiler
     // JitStressEvexEncoding- Answer the question: Is Evex stress knob set
     //
     // Returns:
-    //    `true` if user requests REX2 encoding.
+    //    `true` if user requests EVEX encoding.
     //
     bool JitStressEvexEncoding() const
     {
 #ifdef DEBUG
         return JitConfig.JitStressEvexEncoding() || JitConfig.JitStressRex2Encoding();
 #endif // DEBUG
+        return false;
+    }
+
+    //------------------------------------------------------------------------
+    // DoJitStressPromotedEvexEncoding- Answer the question: Do we force promoted EVEX encoding.
+    //
+    // Returns:
+    //    `true` if user requests promoted EVEX encoding.
+    //
+    bool DoJitStressPromotedEvexEncoding() const
+    {
+#ifdef DEBUG
+        if (JitConfig.JitStressPromotedEvexEncoding())
+        {
+            return true;
+        }
+#endif // DEBUG
 
         return false;
     }
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 02461633f3c547..e8854e78a2200d 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -471,6 +471,7 @@ class emitter
         SetUseVEXEncoding(false);
         SetUseEvexEncoding(false);
         SetUseRex2Encoding(false);
+        SetUsePromotedEVEXEncoding(false);
 #endif // TARGET_XARCH
 
         emitDataSecCur = nullptr;
@@ -793,7 +794,19 @@ class emitter
         // For normal and embedded broadcast intrinsics, EVEX.L'L has the same semantic, vector length.
         // For embedded rounding, EVEX.L'L semantic changes to indicate the rounding mode.
         // Multiple bits in _idEvexbContext are used to inform emitter to specially handle the EVEX.L'L bits.
-        unsigned _idEvexbContext : 2;
+        unsigned _idCustom5 : 1;
+        unsigned _idCustom6 : 1;
+
+#define _idEvexbContext                                                                                                \
+    (_idCustom6 << 1) | _idCustom5  /* Evex.b: embedded broadcast, embedded rounding, embedded SAE                     \
+                                     */
+#define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */
+#define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */
+
+        // In certian cases, we do not allow instructions to be promoted to APX-EVEX.
+        // e.g. instructions like add/and/or/inc/dec can be used with LOCK prefix, but cannot be prefixed by LOCK and
+        // EVEX together.
+        unsigned _idNoApxEvexXPromotion : 1;
 #endif //  TARGET_XARCH
 
 #ifdef TARGET_ARM64
@@ -826,8 +839,8 @@ class emitter
 
         ////////////////////////////////////////////////////////////////////////
         // Space taken up to here:
-        // x86:         48 bits
-        // amd64:       48 bits
+        // x86:         49 bits
+        // amd64:       49 bits
         // arm:         48 bits
         // arm64:       55 bits
         // loongarch64: 46 bits
@@ -845,7 +858,7 @@ class emitter
 #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 #define ID_EXTRA_BITFIELD_BITS (14)
 #elif defined(TARGET_XARCH)
-#define ID_EXTRA_BITFIELD_BITS (16)
+#define ID_EXTRA_BITFIELD_BITS (17)
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -879,8 +892,8 @@ class emitter
 
         ////////////////////////////////////////////////////////////////////////
         // Space taken up to here (with/without prev offset, assuming host==target):
-        // x86:         54/50 bits
-        // amd64:       55/50 bits
+        // x86:         55/51 bits
+        // amd64:       56/51 bits
         // arm:         54/50 bits
         // arm64:       62/57 bits
         // loongarch64: 53/48 bits
@@ -1657,38 +1670,17 @@ class emitter
 #ifdef TARGET_XARCH
         bool idIsEvexbContextSet() const
         {
-            return _idEvexbContext != 0;
+            return idGetEvexbContext() != 0;
         }
 
         void idSetEvexbContext(insOpts instOptions)
         {
             assert(!idIsEvexbContextSet());
+            assert(idGetEvexbContext() == 0);
+            unsigned value = static_cast<unsigned>(instOptions & INS_OPTS_EVEX_b_MASK);
 
-            switch (instOptions & INS_OPTS_EVEX_b_MASK)
-            {
-                case INS_OPTS_EVEX_eb_er_rd:
-                {
-                    _idEvexbContext = 1;
-                    break;
-                }
-
-                case INS_OPTS_EVEX_er_ru:
-                {
-                    _idEvexbContext = 2;
-                    break;
-                }
-
-                case INS_OPTS_EVEX_er_rz:
-                {
-                    _idEvexbContext = 3;
-                    break;
-                }
-
-                default:
-                {
-                    unreached();
-                }
-            }
+            _idCustom5 = ((value >> 0) & 1);
+            _idCustom6 = ((value >> 1) & 1);
         }
 
         unsigned idGetEvexbContext() const
@@ -1728,6 +1720,39 @@ class emitter
             assert(!idIsEvexZContextSet());
             _idEvexZContext = 1;
         }
+
+        bool idIsEvexNdContextSet() const
+        {
+            return _idEvexNdContext != 0;
+        }
+
+        void idSetEvexNdContext()
+        {
+            assert(!idIsEvexNdContextSet());
+            _idEvexNdContext = 1;
+        }
+
+        bool idIsEvexNfContextSet() const
+        {
+            return _idEvexNfContext != 0;
+        }
+
+        void idSetEvexNfContext()
+        {
+            assert(!idIsEvexNfContextSet());
+            _idEvexNfContext = 1;
+        }
+
+        bool idIsNoApxEvexPromotion() const
+        {
+            return _idNoApxEvexXPromotion != 0;
+        }
+
+        void idSetNoApxEvexPromotion()
+        {
+            assert(!idIsNoApxEvexPromotion());
+            _idNoApxEvexXPromotion = 1;
+        }
 #endif
 
 #ifdef TARGET_ARMARCH
@@ -2531,7 +2556,12 @@ class emitter
     CORINFO_FIELD_HANDLE emitSimdMaskConst(simdmask_t constValue);
 #endif // FEATURE_MASKED_HW_INTRINSICS
 #endif // FEATURE_SIMD
+
+#if defined(TARGET_XARCH)
+    regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src, regNumber targetReg = REG_NA);
+#else
     regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
+#endif
     regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
     void      emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
     void      emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem);
diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp
index 3fd34318d9ec11..10d344b22c02fa 100644
--- a/src/coreclr/jit/emitarm64.cpp
+++ b/src/coreclr/jit/emitarm64.cpp
@@ -12311,7 +12311,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
                 // If there are 2 GC vars in this instrDesc, get the 2nd variable
                 // that should be tracked.
-                adr2     = emitComp->lvaFrameAddress(varNum2, &FPbased2, true);
+                adr2     = emitComp->lvaFrameAddress(varNum2, &FPbased2, FPbased);
                 ofs2Dist = EA_SIZE_IN_BYTES(size);
 #ifdef DEBUG
                 assert(FPbased == FPbased2);
diff --git a/src/coreclr/jit/emitfmtsxarch.h b/src/coreclr/jit/emitfmtsxarch.h
index f893fce8d07eea..a94a7c1b3e7d5b 100644
--- a/src/coreclr/jit/emitfmtsxarch.h
+++ b/src/coreclr/jit/emitfmtsxarch.h
@@ -140,6 +140,7 @@ IF_DEF(RRW_RRW,         IS_R1_RW|IS_R2_RW,                   NONE)      // r/w
 IF_DEF(RRD_RRD_CNS,     IS_R1_RD|IS_R2_RD,                   SCNS)      // read  reg1,  read  reg2,  const
 IF_DEF(RWR_RRD_CNS,     IS_R1_WR|IS_R2_RD,                   SCNS)      // write reg1,  read  reg2,  const
 IF_DEF(RRW_RRD_CNS,     IS_R1_RW|IS_R2_RD,                   SCNS)      // r/w   reg1,  read  reg2,  const
+IF_DEF(RWR_RRD_SHF,     IS_R1_WR|IS_R2_RD,                   SCNS)      // write reg1,  read  reg2,  shift
 
 IF_DEF(RRD_RRD_RRD,     IS_R1_RD|IS_R2_RD|IS_R3_RD,          NONE)      // read  reg1,  read  reg2,  read  reg3
 IF_DEF(RWR_RRD_RRD,     IS_R1_WR|IS_R2_RD|IS_R3_RD,          NONE)      // write reg1,  read  reg2,  read  reg3
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index ddec8af5e753f5..8b51ff52f181f8 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -236,6 +236,18 @@ bool emitter::HasRex2Encoding(instruction ins) const
     return (flags & Encoding_REX2) != 0;
 }
 
+bool emitter::HasApxNdd(instruction ins) const
+{
+    insFlags flags = CodeGenInterface::instInfo[ins];
+    return (flags & INS_Flags_Has_NDD) != 0;
+}
+
+bool emitter::HasApxNf(instruction ins) const
+{
+    insFlags flags = CodeGenInterface::instInfo[ins];
+    return (flags & INS_Flags_Has_NF) != 0;
+}
+
 bool emitter::IsVexEncodableInstruction(instruction ins) const
 {
     if (!UseVEXEncoding())
@@ -293,6 +305,106 @@ bool emitter::IsRex2EncodableInstruction(instruction ins) const
     return HasRex2Encoding(ins);
 }
 
+//------------------------------------------------------------------------
+// IsApxNDDEncodableInstruction: Answer the question- does this instruction have apx ndd form.
+//
+// Arguments:
+//    ins - The instruction to check.
+//
+// Returns:
+//    `true` if ins has apx ndd form.
+//
+bool emitter::IsApxNDDEncodableInstruction(instruction ins) const
+{
+    if (!UsePromotedEVEXEncoding())
+    {
+        return false;
+    }
+
+    return HasApxNdd(ins);
+}
+
+//------------------------------------------------------------------------
+// IsApxNFEncodableInstruction: Answer the question - does this instruction have Evex.nf supported
+//
+// Arguments:
+//    ins - The instruction to check.
+//
+// Returns:
+//    `true` if ins is Evex.nf supported.
+//
+bool emitter::IsApxNFEncodableInstruction(instruction ins) const
+{
+    if (!UsePromotedEVEXEncoding())
+    {
+        return false;
+    }
+
+    return HasApxNf(ins);
+}
+
+//------------------------------------------------------------------------
+// IsApxExtendedEvexInstruction: Answer the question - does this instruction have apx extended evex form.
+//
+// Arguments:
+//    ins - The instruction to check.
+//
+// Returns:
+//    `true` if ins has apx extended evex form.
+//
+bool emitter::IsApxExtendedEvexInstruction(instruction ins) const
+{
+    if (!UsePromotedEVEXEncoding())
+    {
+        return false;
+    }
+
+    return HasApxNdd(ins) || HasApxNf(ins);
+}
+
+//------------------------------------------------------------------------
+// IsShiftInstruction: Answer the question- is this instruction a shift instruction.
+//
+// Arguments:
+//    ins - The instruction to check.
+//
+// Returns:
+//    `true` if ins is a shift instruction.
+//
+bool emitter::IsShiftInstruction(instruction ins) const
+{
+    switch (ins)
+    {
+        case INS_rcl_1:
+        case INS_rcr_1:
+        case INS_rol_1:
+        case INS_ror_1:
+        case INS_shl_1:
+        case INS_shr_1:
+        case INS_sar_1:
+
+        case INS_rcl:
+        case INS_rcr:
+        case INS_rol:
+        case INS_ror:
+        case INS_shl:
+        case INS_shr:
+        case INS_sar:
+
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
 //------------------------------------------------------------------------
 // IsLegacyMap1: Answer the question- Is this instruction on legacy-map-1
 //
@@ -324,7 +436,7 @@ bool emitter::IsLegacyMap1(code_t code) const
 
     if ((code & 0xFF00FF00) == 0x0F000000)
     {
-        // 4-byte, need to check if PP is a prefix.
+        // 4-byte, need to check if PP is prefixs
         BYTE prefix = (BYTE)((code & 0xFF0000) >> 16);
         return ((prefix == 0xF2) || (prefix == 0xF3) || (prefix == 0x66));
     }
@@ -647,6 +759,24 @@ bool emitter::IsRexW1EvexInstruction(instruction ins)
     return false;
 }
 
+//------------------------------------------------------------------------
+// DoJitUseApxNDD: Answer the question: does JIT use APX NDD feature on the given instruction?
+//
+// Arguments:
+//    ins - instruction to test
+//
+// Return Value:
+//    true if JIT allows APX NDD to be applied on the instructions.
+//
+bool emitter::DoJitUseApxNDD(instruction ins) const
+{
+#if !defined(TARGET_AMD64)
+    return false;
+#else
+    return JitConfig.EnableApxNDD() && IsApxNDDEncodableInstruction(ins);
+#endif
+}
+
 #ifdef TARGET_64BIT
 //------------------------------------------------------------------------
 // AreUpperBitsZero: check if some previously emitted
@@ -1257,6 +1387,179 @@ insOpts emitter::GetEmbRoundingMode(uint8_t mode) const
     }
 }
 
+//------------------------------------------------------------------------
+// emitHandleGCrefRegs: Update GC ref related registers' liveness.
+//
+// Arguments:
+//   dst - Destination buffer.
+//   id  - instruction descriptor to the GC ref instruction.
+//
+void emitter::emitHandleGCrefRegs(BYTE* dst, instrDesc* id)
+{
+    regNumber reg1 = id->idReg1(); // dst and src1
+    regNumber reg2 = id->idReg2(); // src2
+    switch (id->idInsFmt())
+    {
+        case IF_RRD_RRD:
+            break;
+
+        case IF_RWR_RRD:
+        {
+            if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
+            {
+                // We're relocating "this" in the prolog
+                assert(emitComp->lvaIsOriginalThisArg(0));
+                assert(emitComp->lvaTable[0].lvRegister);
+                assert(emitComp->lvaTable[0].GetRegNum() == reg1);
+
+                if (emitFullGCinfo)
+                {
+                    emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
+                    break;
+                }
+                else
+                {
+                    /* If emitFullGCinfo==false, the we don't use any
+                       regPtrDsc's and so explicitly note the location
+                       of "this" in GCEncode.cpp
+                     */
+                }
+            }
+
+            emitGCregLiveUpd(id->idGCref(), reg1, dst);
+            break;
+        }
+
+        case IF_RRW_RRD:
+        case IF_RWR_RRD_RRD:
+        {
+            regNumber targetReg = reg1; // dst
+
+            // if the instructions is encoded in NDD form,
+            // src registers will be the 2nd and 3rd register on id.
+            if (id->idInsFmt() == IF_RWR_RRD_RRD)
+            {
+                reg1 = id->idReg2(); // src1
+                reg2 = id->idReg3(); // src2
+            }
+
+            switch (id->idIns())
+            {
+                /*
+                    This must be one of the following cases:
+
+                    xor reg, reg        to assign NULL
+
+                    and r1 , r2         if (ptr1 && ptr2) ...
+                    or  r1 , r2         if (ptr1 || ptr2) ...
+
+                    add r1 , r2         to compute a normal byref
+                    sub r1 , r2         to compute a strange byref (VC only)
+
+                */
+                case INS_xor:
+                    assert(reg1 == reg2);
+                    emitGCregLiveUpd(id->idGCref(), targetReg, dst);
+                    break;
+
+                case INS_or:
+                case INS_and:
+                    emitGCregDeadUpd(targetReg, dst);
+                    break;
+
+                case INS_add:
+                case INS_sub:
+                case INS_sub_hide:
+                    assert(id->idGCref() == GCT_BYREF);
+
+#if 0
+#ifdef DEBUG
+                        // Due to elided register moves, we can't have the following assert.
+                        // For example, consider:
+                        //    t85 = LCL_VAR byref V01 arg1 rdx (last use) REG rdx
+                        //        /--*  t85    byref
+                        //        *  STORE_LCL_VAR byref  V40 tmp31 rdx REG rdx
+                        // Here, V01 is type `long` on entry, then is stored as a byref. But because
+                        // the register allocator assigned the same register, no instruction was
+                        // generated, and we only (currently) make gcref/byref changes in emitter GC info
+                        // when an instruction is generated. We still generate correct GC info, as this
+                        // instruction, if writing a GC ref even through reading a long, will go live here.
+                        // These situations typically occur due to unsafe casting, such as with Span<T>.
+
+                        regMaskTP regMask;
+                        regMask = genRegMask(reg1) | genRegMask(reg2);
+
+                        // r1/r2 could have been a GCREF as GCREF + int=BYREF
+                        //                               or BYREF+/-int=BYREF
+                        assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
+                               ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
+#endif // DEBUG
+#endif // 0
+
+                    // Mark r1 as holding a byref
+                    emitGCregLiveUpd(GCT_BYREF, targetReg, dst);
+                    break;
+
+                default:
+#ifdef DEBUG
+                    emitDispIns(id, false, false, false);
+#endif
+                    assert(!"unexpected GC reg update instruction");
+            }
+
+            break;
+        }
+
+        case IF_RRW_RRW:
+        {
+            // This must be "xchg reg1, reg2"
+            assert(id->idIns() == INS_xchg);
+
+            // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
+            // register pointer mask.
+
+            GCtype gc1, gc2;
+
+            gc1 = emitRegGCtype(reg1);
+            gc2 = emitRegGCtype(reg2);
+
+            if (gc1 != gc2)
+            {
+                // Kill the GC-info about the GC registers
+
+                if (needsGC(gc1))
+                {
+                    emitGCregDeadUpd(reg1, dst);
+                }
+
+                if (needsGC(gc2))
+                {
+                    emitGCregDeadUpd(reg2, dst);
+                }
+
+                // Now, swap the info
+
+                if (needsGC(gc1))
+                {
+                    emitGCregLiveUpd(gc1, reg2, dst);
+                }
+
+                if (needsGC(gc2))
+                {
+                    emitGCregLiveUpd(gc2, reg1, dst);
+                }
+            }
+            break;
+        }
+
+        default:
+#ifdef DEBUG
+            emitDispIns(id, false, false, false);
+#endif
+            assert(!"unexpected GC ref instruction format");
+    }
+}
+
 //------------------------------------------------------------------------
 // encodeRegAsIval: Encodes a register as an ival for use by a SIMD instruction
 //
@@ -1343,9 +1646,23 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
         return true;
     }
 
+    if (id->idIsEvexNfContextSet() && IsBMIInstruction(ins))
+    {
+        // Only a few BMI instructions shall be promoted to APX-EVEX due to NF feature.
+        // TODO-XArch-APX: convert the check into forms like Has* as above.
+        return true;
+    }
+
 #if defined(DEBUG)
     if (emitComp->DoJitStressEvexEncoding())
     {
+        if (IsBMIInstruction(ins))
+        {
+            // The Encoding_EVEX on some BMI instructions is tagged due to APX,
+            // they cannot be stressed with JitStressEvexEncoding.
+            return false;
+        }
+
         // Requires the EVEX encoding due to STRESS mode and no change in semantics
         //
         // Some instructions, like VCMPEQW return the value in a SIMD register for
@@ -1354,6 +1671,12 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
         // check above so we need to still return false here to preserve semantics.
         return !HasKMaskRegisterDest(ins);
     }
+
+    if (IsApxExtendedEvexInstruction(ins) && emitComp->DoJitStressPromotedEvexEncoding())
+    {
+        // This path will be hit when we stress APX-EVEX and encode VEX with Extended EVEX.
+        return (IsBMIInstruction(ins) && HasApxNf(ins));
+    }
 #endif // DEBUG
 
     if ((ins == INS_pslldq) || (ins == INS_psrldq))
@@ -1408,6 +1731,57 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
     return false;
 }
 
+//------------------------------------------------------------------------
+// TakesApxExtendedEvexPrefix: Checks if the instruction should be legacy-promoted-EVEX encoded.
+//
+// Arguments:
+//    instruction -- processor instruction to check
+//
+// Return Value:
+//    true if this instruction requires a legacy-promoted-EVEX prefix.
+//
+bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const
+{
+    // TODO-XArch-APX:
+    // Isolating legacy-promoted-EVEX case out from VEX/EVEX-promoted-EVEX,
+    // as the latter ones are relatively simple, providing EGPRs functionality,
+    instruction ins = id->idIns();
+    if (!IsApxExtendedEvexInstruction(ins))
+    {
+        return false;
+    }
+
+    if (IsSimdInstruction(ins))
+    {
+        // This check should reject any instruction not from legacy map-0 or 1.
+        return false;
+    }
+
+    if (id->idIsNoApxEvexPromotion())
+    {
+        return false;
+    }
+
+    if (id->idIsEvexNdContextSet())
+    {
+        return true;
+    }
+
+    if (id->idIsEvexNfContextSet())
+    {
+        return true;
+    }
+
+#if defined(DEBUG)
+    if (emitComp->DoJitStressPromotedEvexEncoding())
+    {
+        return true;
+    }
+#endif // DEBUG
+
+    return false;
+}
+
 // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section
 // 2.6.
 // Add base EVEX prefix without setting W, R, X, or B bits
@@ -1450,6 +1824,10 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
 #define ZBIT_IN_BYTE_EVEX_PREFIX      0x0000008000000000ULL
 #define uBIT_IN_BYTE_EVEX_PREFIX      0x0000040000000000ULL
 
+#define MAP4_IN_BYTE_EVEX_PREFIX   0x4000000000000ULL
+#define ND_BIT_IN_BYTE_EVEX_PREFIX 0x1000000000ULL
+#define NF_BIT_IN_BYTE_EVEX_PREFIX 0x400000000ULL
+#define EXTENDED_EVEX_PP_BITS      0x10000000000ULL
 //------------------------------------------------------------------------
 // AddEvexPrefix: Add default EVEX prefix with only LL' bits set.
 //
@@ -1459,19 +1837,72 @@ bool emitter::TakesRex2Prefix(const instrDesc* id) const
 //    attr -- operand size
 //
 // Return Value:
-//    encoded code with Evex prefix.
+//    encoded code with EVEX prefix.
 //
 emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAttr attr)
 {
     // Only AVX512 instructions require EVEX prefix
-    assert(IsEvexEncodableInstruction(id->idIns()));
+    // After APX, some instructions in legacy or VEX space will be promoted to EVEX.
+    instruction ins = id->idIns();
+    assert(IsEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+
+    if (instrIsExtendedReg3opImul(ins))
+    {
+        // the only case imul(0x68) will need EVEX prefix is EVEX.NF feature enabled.
+        // imul(0x68) opcode comes with ModR/M.REG byte to indicate implicit register use,
+        // when it is using extended registers (>= REG_R8), it comes with built-in REX prefix,
+        // remove them first and add the counter part in EVEX.
+        code &= 0xFFFFFFFF;
+    }
 
     // Shouldn't have already added EVEX prefix
     assert(!hasEvexPrefix(code));
 
-    assert((code & DEFAULT_BYTE_EVEX_PREFIX_MASK) == 0);
+    assert((code & DEFAULT_BYTE_EVEX_PREFIX_MASK) == 0);
+
+    code |= DEFAULT_BYTE_EVEX_PREFIX;
+
+    if (IsApxExtendedEvexInstruction(ins))
+    {
+        if (!HasEvexEncoding(ins))
+        {
+            // Legacy-promoted insutrcions are not labeled with Encoding_EVEX.
+            code |= MAP4_IN_BYTE_EVEX_PREFIX;
+        }
+
+        // TODO-XArch-APX:
+        // verify if it is actually safe to reuse the EVEX.ND with EVEX.B on instrDesc.
+        if (id->idIsEvexNdContextSet())
+        {
+            code |= ND_BIT_IN_BYTE_EVEX_PREFIX;
+        }
+
+        if (id->idIsEvexNfContextSet())
+        {
+            code |= NF_BIT_IN_BYTE_EVEX_PREFIX;
+        }
+
+        if (attr == EA_2BYTE)
+        {
+            code |= EXTENDED_EVEX_PP_BITS;
+        }
+
+        if (instrIsExtendedReg3opImul(ins))
+        {
+            // EVEX.R3
+            // TODO-XArch-APX:
+            // A few side notes: based on how JIT defined IMUL, we may need to extend
+            // the definition to `IMUL_31` to cover EGPRs. And it can be defined in a
+            // similar way that opcodes comes with built-in REX2 prefix, and convert
+            // it to EVEX when needed with some helper functions.
+            code &= 0xFF7FFFFFFFFFFFFFULL;
+        }
 
-    code |= DEFAULT_BYTE_EVEX_PREFIX;
+        return code;
+    }
+
+    // No APX-NDD instructions should reach code below.
+    assert(!IsApxExtendedEvexInstruction(ins));
 
     if (attr == EA_32BYTE)
     {
@@ -2022,6 +2453,14 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code)
         }
     }
 #ifdef TARGET_AMD64
+    else if (TakesApxExtendedEvexPrefix(id))
+    {
+        // If the instruction is not VEX/EVEX encodable, and has EVEX prefix,
+        // then it is legacy promoted EVEX.
+        assert(hasEvexPrefix(code));
+        assert(IsApxExtendedEvexInstruction(ins));
+        return emitter::code_t(code | 0x0000800000000000ULL);
+    }
     else if (hasRex2Prefix(code))
     {
         return emitter::code_t(code | 0x000800000000ULL);
@@ -2060,13 +2499,18 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code)
             return code & 0xFF7FFFFFFFFFFFULL;
         }
     }
-#ifdef TARGET_AMD64
+    else if (TakesApxExtendedEvexPrefix(id))
+    {
+        assert(hasEvexPrefix(code));
+        assert(IsApxExtendedEvexInstruction(ins));
+        // R-bit is added in bit-inverted form.
+        return code & 0xFF7FFFFFFFFFFFFFULL;
+    }
     else if (TakesRex2Prefix(id))
     {
         assert(IsRex2EncodableInstruction(ins));
         return code |= 0xD50400000000ULL; // REX2.B3
     }
-#endif // TARGET_AMD64
 
     return code | 0x4400000000ULL;
 }
@@ -2096,13 +2540,18 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code)
             return code & 0xFFBFFFFFFFFFFFULL;
         }
     }
-#ifdef TARGET_AMD64
+    else if (TakesApxExtendedEvexPrefix(id))
+    {
+        assert(hasEvexPrefix(code));
+        assert(IsApxExtendedEvexInstruction(ins));
+        // X-bit is added in bit-inverted form.
+        return code & 0xFFBFFFFFFFFFFFFFULL;
+    }
     else if (TakesRex2Prefix(id))
     {
         assert(IsRex2EncodableInstruction(ins));
         return code |= 0xD50200000000ULL; // REX2.B3
     }
-#endif // TARGET_AMD64
 
     return code | 0x4200000000ULL;
 }
@@ -2132,13 +2581,17 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
             return code & 0xFFDFFFFFFFFFFFULL;
         }
     }
-#ifdef TARGET_AMD64
+    else if (TakesApxExtendedEvexPrefix(id))
+    {
+        assert(IsApxExtendedEvexInstruction(ins));
+        // R-bit is added in bit-inverted form.
+        return code & 0xFFDFFFFFFFFFFFFFULL;
+    }
     else if (TakesRex2Prefix(id))
     {
         assert(IsRex2EncodableInstruction(ins));
         return code |= 0xD50100000000ULL; // REX2.B3
     }
-#endif // TARGET_AMD64
 
     return code | 0x4100000000ULL;
 }
@@ -2221,7 +2674,7 @@ bool isPrefix(BYTE b)
 //
 emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) const
 {
-    assert(IsEvexEncodableInstruction(ins));
+    assert(IsEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
 
     code_t evexPrefix = (code >> 32) & 0xFFFFFFFF;
     code &= 0x00000000FFFFFFFFLL;
@@ -2253,6 +2706,14 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
                 case 0x66:
                 {
                     // None of the existing BMI instructions should be EVEX encoded.
+                    // After APX, BMI instructions can be EVEX encoded with NF feature.
+                    if (IsBMIInstruction(ins))
+                    {
+                        // if BMI instructions reaches this part, then it should be APX-EVEX.
+                        // although the opcode of all the BMI instructions are defined with 0x66,
+                        // but it should not, skip this check.
+                        break;
+                    }
                     assert(!IsBMIInstruction(ins));
                     evexPrefix |= (0x01 << 8);
                     break;
@@ -2306,9 +2767,14 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
         // 2-byte opcode with the bytes ordered as 0x0011RM22. There are 2 posibilities here:
         //      1. the byte in position 11 must be an escape byte.
         //      2. the byte in position 11 must be a map number from 0 to 7.
+
+        // APX promoted EVEX instructions might also go onto this path, so the opcode can also be 1-byte in the form of
+        // 0x0000RM11.
         leadingBytes = (code >> 16) & 0xFF;
-        assert(leadingBytes == 0x0F || (emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) &&
-                                        leadingBytes >= 0x00 && leadingBytes <= 0x07));
+        assert(leadingBytes == 0x0F ||
+               (emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) && leadingBytes >= 0x00 &&
+                leadingBytes <= 0x07) ||
+               (IsApxExtendedEvexInstruction(ins) && leadingBytes == 0));
         code &= 0xFFFF;
     }
 
@@ -2330,6 +2796,12 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
 
         case 0x0F:
         {
+            if (((evexPrefix >> 16) & 0x07) == 0x04)
+            {
+                // MAP index equal to 4 indicates this instruction is a promoted legacy instruction.
+                // the MAP ID has been set when EVEX prefix is added.
+                break;
+            }
             evexPrefix |= (0x01 << 16);
             break;
         }
@@ -2803,6 +3275,11 @@ unsigned emitter::emitGetRexPrefixSize(instrDesc* id, instruction ins)
         return 0;
     }
 
+    if (TakesApxExtendedEvexPrefix(id))
+    {
+        return 0;
+    }
+
     if (TakesRex2Prefix(id))
     {
         return 0;
@@ -2913,10 +3390,20 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
         adjustedSize++;
     }
 #ifdef TARGET_AMD64
-    else if (IsRex2EncodableInstruction(ins))
+    else if (IsRex2EncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins))
     {
         unsigned prefixAdjustedSize = 0;
-        if (TakesRex2Prefix(id))
+        if (TakesApxExtendedEvexPrefix(id))
+        {
+            prefixAdjustedSize = 4;
+            // If the opcode will be prefixed by EVEX, then all the map-1-legacy instructions can remove the escape
+            // prefix
+            if (IsLegacyMap1(code))
+            {
+                prefixAdjustedSize -= 1;
+            }
+        }
+        else if (TakesRex2Prefix(id))
         {
             prefixAdjustedSize = 2;
             // If the opcode will be prefixed by REX2, then all the map-1-legacy instructions can remove the escape
@@ -2927,15 +3414,14 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
             }
         }
 
-        adjustedSize = prefixAdjustedSize;
-
         emitAttr attr = id->idOpSize();
-
-        if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx))
+        if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx) && !TakesApxExtendedEvexPrefix(id))
         {
             // Most 16-bit operand instructions will need a 0x66 prefix.
-            adjustedSize++;
+            prefixAdjustedSize++;
         }
+
+        adjustedSize = prefixAdjustedSize;
     }
 #endif // TARGET_AMD64
     else
@@ -2987,6 +3473,14 @@ unsigned emitter::emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexP
 
     if (includeRexPrefixSize && hasRexPrefix(code))
     {
+        if (instrIsExtendedReg3opImul(id->idIns()) && TakesApxExtendedEvexPrefix(id))
+        {
+            // there is a special case when calculating the size of IMUL with APX-EVEX,
+            // IMUL_08 or beyond will have a built-in REX prefix with its opcode,
+            // so it will hit this branch, but when IMUL is encoded with APX-EVEX,
+            // the size of REX is included in the prefix size, where should be calculated outside.
+            return 0;
+        }
         return 1;
     }
 
@@ -3628,7 +4122,7 @@ inline unsigned emitter::insEncodeReg012(const instrDesc* id, regNumber reg, emi
     {
         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
         // not the corresponding AH, CH, DH, or BH
-        *code = hasRex2Prefix(*code) ? *code : AddRexPrefix(ins, *code); // REX
+        *code = (hasRex2Prefix(*code) || hasEvexPrefix(*code)) ? *code : AddRexPrefix(ins, *code); // REX
     }
 #endif // TARGET_AMD64
 
@@ -3668,7 +4162,7 @@ inline unsigned emitter::insEncodeReg345(const instrDesc* id, regNumber reg, emi
         }
         if (false /*reg >= REG_R16 && reg <= REG_R31*/)
         {
-            // seperate the encoding for REX2.R3/R4, REX2.R3 will be handled in `AddRexRPrefix`.
+            // Seperate the encoding for REX2.R3/R4, REX2.R3 will be handled in `AddRexRPrefix`.
             assert(TakesRex2Prefix(id));
             *code |= 0x004000000000ULL; // REX2.R4
         }
@@ -3677,7 +4171,7 @@ inline unsigned emitter::insEncodeReg345(const instrDesc* id, regNumber reg, emi
     {
         // We are assuming that we only use/encode SPL, BPL, SIL and DIL
         // not the corresponding AH, CH, DH, or BH
-        *code = hasRex2Prefix(*code) ? *code : AddRexPrefix(ins, *code); // REX
+        *code = (hasRex2Prefix(*code) || hasEvexPrefix(*code)) ? *code : AddRexPrefix(ins, *code); // REX
     }
 #endif // TARGET_AMD64
 
@@ -3697,7 +4191,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
     instruction ins = id->idIns();
 
     assert(reg < REG_STK);
-    assert(IsVexOrEvexEncodableInstruction(ins));
+    assert(IsVexOrEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
     assert(hasVexOrEvexPrefix(code));
 
     // Get 4-bit register encoding
@@ -3744,6 +4238,25 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
             return code ^ regBits;
         }
     }
+    else
+    {
+        assert(TakesApxExtendedEvexPrefix(id));
+        assert(hasEvexPrefix(code));
+#if defined(TARGET_AMD64)
+        // TODO-XARCH-AVX512 I don't like that we redefine regBits on the EVEX case.
+        // Rather see these paths cleaned up.
+        regBits = HighAwareRegEncoding(reg);
+
+        if (false /*reg >= REG_R16 && reg <= REG_R31*/)
+        {
+            // Have to set the EVEX V' bit
+            code = AddEvexVPrimePrefix(code);
+        }
+#endif
+        // Shift count = 5-bytes of opcode + 0-2 bits for EVEX
+        regBits <<= 43;
+        return code ^ regBits;
+    }
 
     return code ^ regBits;
 }
@@ -3779,7 +4292,7 @@ inline unsigned emitter::insEncodeRegSIB(const instrDesc* id, regNumber reg, cod
         }
         if (false /*reg >= REG_R16 && reg <= REG_R31*/)
         {
-            // seperate the encoding for REX2.X3/X4, REX2.X3 will be handled in `AddRexXPrefix`.
+            // Separate the encoding for REX2.X3/X4, REX2.X3 will be handled in `AddRexXPrefix`.
             assert(TakesRex2Prefix(id));
             *code |= 0x002000000000ULL; // REX2.X4
         }
@@ -4175,7 +4688,8 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id)
 
     if ((code & 0xFF00) != 0)
     {
-        sz += IsSimdInstruction(ins) ? emitInsSize(id, code, includeRexPrefixSize) : 5;
+        sz += (IsSimdInstruction(ins) || TakesApxExtendedEvexPrefix(id)) ? emitInsSize(id, code, includeRexPrefixSize)
+                                                                         : 5;
     }
     else
     {
@@ -4303,7 +4817,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code,
                 assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
 
                 // Check whether we can use compressed displacement if EVEX.
-                if (TakesEvexPrefix(id))
+                if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
                 {
                     bool compressedFitsInByte = false;
                     TryEvexCompressDisp8Byte(id, ssize_t(offs), &compressedFitsInByte);
@@ -4347,7 +4861,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code,
 #endif // !FEATURE_FIXED_OUT_ARGS
 
     bool useSmallEncoding = false;
-    if (TakesEvexPrefix(id))
+    if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
     {
         TryEvexCompressDisp8Byte(id, ssize_t(offs), &useSmallEncoding);
     }
@@ -4514,7 +5028,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
     }
     else
     {
-        if (TakesEvexPrefix(id))
+        if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
         {
             dsp = TryEvexCompressDisp8Byte(id, dsp, &dspInByte);
         }
@@ -5459,17 +5973,37 @@ void emitter::emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommo
 //    attr - the instruction operand size
 //    dst - the destination and first source operand
 //    src - the second source operand
+//    targetReg - target register of this binary node (only used for APX-NDD form)
 //
 // Assumptions:
 //  i) caller of this routine needs to call genConsumeReg()
 // ii) caller of this routine needs to call genProduceReg()
-regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src, regNumber targetReg)
 {
     // We can only have one memory operand and only src can be a constant operand
     // However, the handling for a given operand type (mem, cns, or other) is fairly
     // consistent regardless of whether they are src or dst. As such, we will find
     // the type of each operand and only check them against src/dst where relevant.
 
+    const bool useNDD = UsePromotedEVEXEncoding() && (targetReg != REG_NA);
+#if !defined(TARGET_AMD64)
+    // APX does not support 32-bit system.
+    assert(!useNDD);
+#else
+    if (useNDD)
+    {
+        assert(IsApxNDDEncodableInstruction(ins));
+        // targetReg has to be an actual register if using NDD.
+        assert(targetReg < REG_STK);
+        // make sure target register is not either of the src registers.
+        assert(dst->isUsedFromReg());
+        regNumber dstreg = dst->GetRegNum();
+        regNumber srcreg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA;
+        assert(targetReg != dstreg);
+        assert(targetReg != srcreg);
+    }
+#endif
+
     GenTree* memOp   = nullptr;
     GenTree* cnsOp   = nullptr;
     GenTree* otherOp = nullptr;
@@ -5481,6 +6015,9 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         assert(dst->isUsedFromMemory() || (dst->GetRegNum() == REG_NA) || instrIs3opImul(ins));
         assert(!src->isUsedFromMemory());
 
+        // APX code cannot hit this path.
+        assert(!useNDD);
+
         memOp = dst;
 
         if (src->isContained())
@@ -5588,6 +6125,9 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
                         assert(otherOp == nullptr);
                         assert(src->IsCnsIntOrI());
 
+                        // APX code cannot hit this path.
+                        assert(!useNDD);
+
                         id = emitNewInstrAmdCns(attr, memIndir->Offset(), (int)src->AsIntConCommon()->IconValue());
                     }
                     else
@@ -5605,6 +6145,13 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
                     assert(id != nullptr);
 
                     id->idIns(ins); // Set the instruction.
+                    if (useNDD)
+                    {
+                        assert(memOp == src);
+                        id->idReg1(targetReg);
+                        id->idReg2(dst->GetRegNum());
+                        id->idSetEvexNdContext();
+                    }
 
                     // Determine the instruction format
                     insFormat fmt = IF_NONE;
@@ -5620,12 +6167,13 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
                         }
                         else
                         {
-                            fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+                            fmt = useNDD ? emitInsModeFormat(ins, IF_RWR_RRD_ARD) : emitInsModeFormat(ins, IF_RRD_ARD);
                         }
                     }
                     else
                     {
                         assert(memOp == dst);
+                        assert(!useNDD);
 
                         if (cnsOp != nullptr)
                         {
@@ -5664,6 +6212,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
                     else
                     {
                         assert(memOp == dst);
+                        assert(!useNDD);
 
                         if (cnsOp != nullptr)
                         {
@@ -5686,7 +6235,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
                     dispIns(id);
                     emitCurIGsize += sz;
 
-                    return (memOp == src) ? dst->GetRegNum() : REG_NA;
+                    return (memOp == src) ? (useNDD ? targetReg : dst->GetRegNum()) : REG_NA;
                 }
             }
         }
@@ -5734,15 +6283,24 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
             }
             else
             {
-                // src is a stack based local variable
-                // dst is a register
-                emitIns_R_S(ins, attr, dst->GetRegNum(), varNum, offset);
+                if (useNDD)
+                {
+                    emitIns_R_R_S(ins, attr, targetReg, dst->GetRegNum(), varNum, offset, INS_OPTS_EVEX_nd);
+                    return targetReg;
+                }
+                else
+                {
+                    // src is a stack based local variable
+                    // dst is a register
+                    emitIns_R_S(ins, attr, dst->GetRegNum(), varNum, offset);
+                }
             }
         }
         else
         {
             assert(memOp == dst);
             assert((dst->GetRegNum() == REG_NA) || dst->IsRegOptional());
+            assert(!useNDD);
 
             if (cnsOp != nullptr)
             {
@@ -5774,10 +6332,20 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         {
             assert(!dst->isContained());
             GenTreeIntConCommon* intCns = src->AsIntConCommon();
-            emitIns_R_I(ins, attr, dst->GetRegNum(), intCns->IconValue());
+
+            if (useNDD)
+            {
+                emitIns_R_R_I(ins, attr, targetReg, dst->GetRegNum(), (int)intCns->IconValue(), INS_OPTS_EVEX_nd);
+                return targetReg;
+            }
+            else
+            {
+                emitIns_R_I(ins, attr, dst->GetRegNum(), intCns->IconValue());
+            }
         }
         else
         {
+            assert(!useNDD);
             assert(src->IsCnsFltOrDbl());
             GenTreeDblCon* dblCns = src->AsDblCon();
 
@@ -5796,7 +6364,15 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
         }
         else
         {
-            emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum());
+            if (useNDD)
+            {
+                emitIns_R_R_R(ins, attr, targetReg, dst->GetRegNum(), src->GetRegNum(), INS_OPTS_EVEX_nd);
+                return targetReg;
+            }
+            else
+            {
+                emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum());
+            }
         }
     }
 
@@ -5947,7 +6523,7 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeI
  *  Add an instruction referencing a single register.
  */
 
-void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts instOptions /* = INS_OPTS_NONE */)
 {
     emitAttr size = EA_SIZE(attr);
 
@@ -6023,6 +6599,8 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
     id->idInsFmt(fmt);
     id->idReg1(reg);
 
+    SetEvexNfIfNeeded(id, instOptions);
+
     // Vex bytes
     sz += emitGetAdjustedSize(id, insEncodeMRreg(id, reg, attr, insCodeMR(ins)));
 
@@ -6095,10 +6673,11 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
  *  Add an instruction referencing a register and a constant.
  */
 
-void emitter::emitIns_R_I(instruction ins,
-                          emitAttr    attr,
-                          regNumber   reg,
-                          ssize_t val DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+void emitter::emitIns_R_I(instruction         ins,
+                          emitAttr            attr,
+                          regNumber           reg,
+                          ssize_t             val,
+                          insOpts instOptions DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
     emitAttr size = EA_SIZE(attr);
 
@@ -6238,6 +6817,8 @@ void emitter::emitIns_R_I(instruction ins,
     id->idDebugOnlyInfo()->idMemCookie = targetHandle;
 #endif
 
+    SetEvexNfIfNeeded(id, instOptions);
+
     if (isSimdInsAndValInByte)
     {
         bool includeRexPrefixSize = true;
@@ -6251,8 +6832,14 @@ void emitter::emitIns_R_I(instruction ins,
 
         sz += emitInsSize(id, insCodeMI(ins), includeRexPrefixSize);
     }
-
     sz += emitGetAdjustedSize(id, insCodeMI(ins));
+#ifdef TARGET_AMD64
+    if (reg == REG_EAX && !instrIs3opImul(ins) && TakesApxExtendedEvexPrefix(id))
+    {
+        // ACC form is not promoted into EVEX space, need to emit with MI form.
+        sz += 1;
+    }
+#endif // TARGET_AMD64
 
     // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
     // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
@@ -7026,6 +7613,14 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum
     id->idReg1(reg1);
     id->idReg2(reg2);
 
+    SetEvexNdIfNeeded(id, instOptions);
+    SetEvexNfIfNeeded(id, instOptions);
+
+    if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins))
+    {
+        id->idInsFmt(IF_RWR_RRD);
+    }
+
     if ((instOptions & INS_OPTS_EVEX_b_MASK) != INS_OPTS_NONE)
     {
         // if EVEX.b needs to be set in this path, then it should be embedded rounding.
@@ -7079,6 +7674,32 @@ void emitter::emitIns_R_R_I(
 
     assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0);
     SetEvexEmbMaskIfNeeded(id, instOptions);
+    SetEvexNdIfNeeded(id, instOptions);
+
+    if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins))
+    {
+        // need to fix the instruction opcode for legacy instructions as they has different opcode for RI form.
+        code = insCodeMI(ins);
+        // need to fix the instructions format for NDD legacy instructions.
+        insFormat fmt;
+        switch (ins)
+        {
+            case INS_shl_N:
+            case INS_shr_N:
+            case INS_sar_N:
+            case INS_ror_N:
+            case INS_rol_N:
+            case INS_rcr_N:
+            case INS_rcl_N:
+                fmt = IF_RWR_RRD_SHF;
+                break;
+
+            default:
+                fmt = IF_RWR_RRD_CNS;
+                break;
+        }
+        id->idInsFmt(fmt);
+    }
 
     UNATIVE_OFFSET sz = emitInsSizeRR(id, code, ival);
     id->idCodeSize(sz);
@@ -7087,7 +7708,7 @@ void emitter::emitIns_R_R_I(
     emitCurIGsize += sz;
 }
 
-void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs)
+void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs, insOpts instOptions)
 {
     assert(ins == INS_prefetcht0 || ins == INS_prefetcht1 || ins == INS_prefetcht2 || ins == INS_prefetchnta ||
            ins == INS_inc || ins == INS_dec);
@@ -7100,6 +7721,11 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off
     id->idAddr()->iiaAddrMode.amBaseReg = base;
     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
 
+    if ((instOptions & INS_OPTS_EVEX_NoApxPromotion) != 0)
+    {
+        id->idSetNoApxEvexPromotion();
+    }
+
     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
     id->idCodeSize(sz);
 
@@ -7443,8 +8069,8 @@ void emitter::emitIns_R_R_C(instruction          ins,
 void emitter::emitIns_R_R_R(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, insOpts instOptions)
 {
-    assert(IsSimdInstruction(ins));
-    assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));
+    assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins) || IsApxExtendedEvexInstruction(ins));
 
     instrDesc* id = emitNewInstr(attr);
     id->idIns(ins);
@@ -7460,6 +8086,14 @@ void emitter::emitIns_R_R_R(
         id->idSetEvexbContext(instOptions);
     }
     SetEvexEmbMaskIfNeeded(id, instOptions);
+    SetEvexNdIfNeeded(id, instOptions);
+    SetEvexNfIfNeeded(id, instOptions);
+
+    if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins))
+    {
+        // need to fix the instructions format for NDD legacy instructions.
+        id->idInsFmt(IF_RWR_RRD_RRD);
+    }
 
     UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins));
     id->idCodeSize(sz);
@@ -7471,8 +8105,8 @@ void emitter::emitIns_R_R_R(
 void emitter::emitIns_R_R_S(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, insOpts instOptions)
 {
-    assert(IsSimdInstruction(ins));
-    assert(IsThreeOperandAVXInstruction(ins));
+    assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins));
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -7484,6 +8118,12 @@ void emitter::emitIns_R_R_S(
 
     SetEvexBroadcastIfNeeded(id, instOptions);
     SetEvexEmbMaskIfNeeded(id, instOptions);
+    SetEvexNdIfNeeded(id, instOptions);
+
+    if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins))
+    {
+        id->idInsFmt(IF_RWR_RRD_SRD);
+    }
 
 #ifdef DEBUG
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
@@ -8156,7 +8796,7 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
  *  The following adds instructions referencing address modes.
  */
 
-void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp)
+void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int disp, insOpts instOptions)
 {
     assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
 
@@ -8202,6 +8842,10 @@ void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber re
 
     id->idAddr()->iiaAddrMode.amBaseReg = reg;
     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+    if ((instOptions & INS_OPTS_EVEX_NoApxPromotion) != 0)
+    {
+        id->idSetNoApxEvexPromotion();
+    }
 
     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
 
@@ -8310,9 +8954,10 @@ void emitter::emitIns_R_AI(instruction  ins,
     emitCurIGsize += sz;
 }
 
-void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber reg, regNumber base, cnsval_ssize_t disp)
+void emitter::emitIns_AR_R(
+    instruction ins, emitAttr attr, regNumber reg, regNumber base, cnsval_ssize_t disp, insOpts instOptions)
 {
-    emitIns_ARX_R(ins, attr, reg, base, REG_NA, 1, disp);
+    emitIns_ARX_R(ins, attr, reg, base, REG_NA, 1, disp, instOptions);
 }
 
 //------------------------------------------------------------------------
@@ -8595,8 +9240,14 @@ void emitter::emitIns_R_ARX(
     emitCurIGsize += sz;
 }
 
-void emitter::emitIns_ARX_R(
-    instruction ins, emitAttr attr, regNumber reg, regNumber base, regNumber index, unsigned scale, cnsval_ssize_t disp)
+void emitter::emitIns_ARX_R(instruction    ins,
+                            emitAttr       attr,
+                            regNumber      reg,
+                            regNumber      base,
+                            regNumber      index,
+                            unsigned       scale,
+                            cnsval_ssize_t disp,
+                            insOpts        instOptions)
 {
     UNATIVE_OFFSET sz;
     instrDesc*     id = emitNewInstrAmd(attr, disp);
@@ -8622,6 +9273,10 @@ void emitter::emitIns_ARX_R(
     id->idAddr()->iiaAddrMode.amBaseReg = base;
     id->idAddr()->iiaAddrMode.amIndxReg = index;
     id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(scale);
+    if ((instOptions & INS_OPTS_EVEX_NoApxPromotion) != 0)
+    {
+        id->idSetNoApxEvexPromotion();
+    }
 
     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
 
@@ -9606,6 +10261,74 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
     emitAdjustStackDepthPushPop(ins);
 }
 
+void emitter::emitIns_BASE_R_R(instruction ins, emitAttr attr, regNumber op1Reg, regNumber op2Reg)
+{
+    if (DoJitUseApxNDD(ins) && (op1Reg != op2Reg))
+    {
+        // If APX-EVEX-NDD is available and needed, emit instructions in:
+        // ins dst, src
+        emitIns_R_R(ins, attr, op1Reg, op2Reg, INS_OPTS_EVEX_nd);
+    }
+    else
+    {
+        // mov dst, src
+        // ins dst
+        emitIns_Mov(INS_mov, attr, op1Reg, op2Reg, /*canSkip*/ true);
+        emitIns_R(ins, attr, op1Reg);
+    }
+}
+
+void emitter::emitIns_BASE_R_R_I(instruction ins, emitAttr attr, regNumber op1Reg, regNumber op2Reg, int ival)
+{
+    if (DoJitUseApxNDD(ins) && (op1Reg != op2Reg))
+    {
+        // If APX-EVEX-NDD is available and needed, emit instructions in:
+        // ins dst, src, cns
+        if (IsShiftInstruction(ins) && ival == 1)
+        {
+            emitIns_R_R(ins, attr, op1Reg, op2Reg, INS_OPTS_EVEX_nd);
+        }
+        else
+        {
+            emitIns_R_R_I(ins, attr, op1Reg, op2Reg, ival, INS_OPTS_EVEX_nd);
+        }
+    }
+    else
+    {
+        // mov dst, src
+        // ins dst, cns
+        emitIns_Mov(INS_mov, attr, op1Reg, op2Reg, /*canSkip*/ true);
+        if (IsShiftInstruction(ins) && ival == 1)
+        {
+            emitIns_R(ins, attr, op1Reg);
+        }
+        else
+        {
+            emitIns_R_I(ins, attr, op1Reg, ival);
+        }
+    }
+}
+
+regNumber emitter::emitIns_BASE_R_R_RM(
+    instruction ins, emitAttr attr, regNumber targetReg, GenTree* treeNode, GenTree* regOp, GenTree* rmOp)
+{
+    bool      requiresOverflowCheck = treeNode->gtOverflowEx();
+    regNumber r                     = REG_NA;
+    assert(regOp->isUsedFromReg());
+
+    if (DoJitUseApxNDD(ins) && regOp->GetRegNum() != targetReg)
+    {
+        r = emitInsBinary(ins, attr, regOp, rmOp, targetReg);
+    }
+    else
+    {
+        emitIns_Mov(INS_mov, attr, targetReg, regOp->GetRegNum(), /*canSkip*/ true);
+        r = emitInsBinary(ins, attr, treeNode, rmOp);
+    }
+
+    return r;
+}
+
 //----------------------------------------------------------------------------------------
 // IsRedundantStackMov:
 //    Check if the current `mov` instruction is redundant and can be omitted when dealing with Load/Store from stack.
@@ -9743,6 +10466,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va
 
     SetEvexBroadcastIfNeeded(id, instOptions);
     SetEvexEmbMaskIfNeeded(id, instOptions);
+    SetEvexNfIfNeeded(id, instOptions);
 
     UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
     id->idCodeSize(sz);
@@ -11447,6 +12171,13 @@ void emitter::emitDispEmbRounding(instrDesc* id) const
     {
         return;
     }
+
+    if (IsApxExtendedEvexInstruction(id->idIns()))
+    {
+        // Apx-Evex.nd shared the same bit(s) with Evex.b,
+        // for ndd case, we don't need to display any thing special.
+        return;
+    }
     assert(!id->idHasMem());
     unsigned roundingMode = id->idGetEvexbContext();
     if (roundingMode == 1)
@@ -11627,6 +12358,14 @@ void emitter::emitDispIns(
 
     /* Display the instruction name */
 
+#ifdef TARGET_AMD64
+    if (IsApxNFEncodableInstruction(id->idIns()) && id->idIsEvexNfContextSet())
+    {
+        // print the EVEX.NF indication in psudeo prefix style.
+        printf("{nf}    ");
+    }
+#endif // TARGET_AMD64
+
     sstr = codeGen->genInsDisplayName(id);
     printf(" %-9s", sstr);
 
@@ -12377,6 +13116,20 @@ void emitter::emitDispIns(
                     break;
                 }
 
+                case INS_rol:
+                case INS_ror:
+                case INS_rcl:
+                case INS_rcr:
+                case INS_shl:
+                case INS_shr:
+                case INS_sar:
+                {
+                    printf("%s", emitRegName(id->idReg1(), attr));
+                    printf(", %s", emitRegName(id->idReg2(), attr));
+                    emitDispShift(ins, (BYTE)0);
+                    break;
+                }
+
                 default:
                 {
                     printf("%s", emitRegName(id->idReg1(), attr));
@@ -12394,8 +13147,8 @@ void emitter::emitDispIns(
         case IF_RRW_RRD_RRD:
         case IF_RWR_RWR_RRD:
         {
-            assert(IsVexOrEvexEncodableInstruction(ins));
-            assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+            assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins) || IsApxExtendedEvexInstruction(ins));
 
             regNumber reg2 = id->idReg2();
             regNumber reg3 = id->idReg3();
@@ -12630,6 +13383,19 @@ void emitter::emitDispIns(
             break;
         }
 
+        case IF_RWR_RRD_SHF:
+        {
+            assert(IsApxExtendedEvexInstruction(id->idIns()));
+            printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
+
+            emitGetInsCns(id, &cnsVal);
+            val = cnsVal.cnsVal;
+
+            emitDispShift(ins, (BYTE)val);
+
+            break;
+        }
+
         case IF_RRD_MRD:
         case IF_RWR_MRD:
         case IF_RRW_MRD:
@@ -13578,12 +14344,21 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
                 break;
 
             case EA_2BYTE:
-
-                /* Output a size prefix for a 16-bit operand */
-
-                dst += emitOutputByte(dst, 0x66);
-
+            {
+                // Output a size prefix for a 16-bit operand
+                if (TakesApxExtendedEvexPrefix(id))
+                {
+                    assert(IsApxExtendedEvexInstruction(ins));
+                    assert(hasEvexPrefix(code));
+                    // Evex.pp should already be added when adding the prefix.
+                    assert((code & EXTENDED_EVEX_PP_BITS) != 0);
+                }
+                else
+                {
+                    dst += emitOutputByte(dst, 0x66);
+                }
                 FALLTHROUGH;
+            }
 
             case EA_4BYTE:
 #ifdef TARGET_AMD64
@@ -13627,7 +14402,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
     }
     else
     {
-        if (TakesEvexPrefix(id))
+        if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
         {
             dsp = TryEvexCompressDisp8Byte(id, dsp, &dspInByte);
         }
@@ -14165,6 +14940,14 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
                 break;
 
+            case IF_RWR_RRD_ARD:
+                assert(((id->idGCref() == GCT_BYREF) &&
+                        (ins == INS_add || ins == INS_sub || ins == INS_sub_hide || insIsCMOV(ins))) ||
+                       ((id->idGCref() == GCT_GCREF) && insIsCMOV(ins)));
+                assert(id->idIsEvexNdContextSet());
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
             case IF_ARD_RRD:
             case IF_AWR_RRD:
                 break;
@@ -14411,25 +15194,45 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
         switch (size)
         {
             case EA_1BYTE:
+#ifdef TARGET_AMD64
+                assert((ins != INS_lzcnt_apx) && (ins != INS_tzcnt_apx) && (ins != INS_popcnt_apx));
+#endif // TARGET_AMD64
                 break;
 
             case EA_2BYTE:
                 // Output a size prefix for a 16-bit operand
-                dst += emitOutputByte(dst, 0x66);
+                {
+                    if (!TakesApxExtendedEvexPrefix(id))
+                    {
+                        dst += emitOutputByte(dst, 0x66);
+                    }
+                }
                 FALLTHROUGH;
 
             case EA_4BYTE:
+                code |= 0x01;
+                break;
+
 #ifdef TARGET_AMD64
             case EA_8BYTE:
-#endif // TARGET_AMD64
-
                 /* Set the 'w' size bit to indicate 32-bit operation
                  * Note that incrementing "code" for INS_call (0xFF) would
                  * overflow, whereas setting the lower bit to 1 just works out
                  */
-
-                code |= 0x01;
-                break;
+                {
+                    if (TakesApxExtendedEvexPrefix(id))
+                    {
+                        assert(hasEvexPrefix(code));
+                        code = AddRexWPrefix(id, code);
+                    }
+                    if ((ins != INS_lzcnt_apx) && (ins != INS_tzcnt_apx) && (ins != INS_popcnt_apx))
+                    // These instructions do not support 1-byte inputs and the opcode is exact.
+                    {
+                        code |= 0x01;
+                    }
+                    break;
+                }
+#endif // TARGET_AMD64
 
 #ifdef TARGET_X86
             case EA_8BYTE:
@@ -14463,7 +15266,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
     // function, to which the remainder of the emitter logic should handle properly.
     // TODO-XARCH-AVX512 : embedded broadcast might change this
     int dspAsByte = dsp;
-    if (TakesEvexPrefix(id))
+    if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
     {
         dspAsByte = int(TryEvexCompressDisp8Byte(id, ssize_t(dsp), &dspInByte));
     }
@@ -14517,7 +15320,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
         // TODO-XARCH-AVX512 : working to wrap up all adjusted disp8 compression logic into the following
         // function, to which the remainder of the emitter logic should handle properly.
         // TODO-XARCH-AVX512 : embedded broadcast might change this
-        if (TakesEvexPrefix(id))
+        if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
         {
             dspAsByte = int(TryEvexCompressDisp8Byte(id, ssize_t(dsp), &dspInByte));
         }
@@ -14664,6 +15467,15 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
                 emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
                 break;
 
+            case IF_RWR_RRD_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
+
+                // reg could have been a GCREF as GCREF + int=BYREF
+                //                             or BYREF+/-int=BYREF
+                assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide));
+                assert(id->idIsEvexNdContextSet());
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
             case IF_SRW_CNS:
             case IF_SRW_RRD:
             case IF_SRW_RRW:
@@ -15249,7 +16061,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
 
                 // Can't use the compact form, use the long form
                 ins = (instruction)(ins + 1);
-                if (size == EA_2BYTE)
+                if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id))
                 {
                     // Output a size prefix for a 16-bit operand
                     dst += emitOutputByte(dst, 0x66);
@@ -15262,10 +16074,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
                     code |= 0x1;
                 }
 
-                if (TakesRex2Prefix(id))
-                {
-                    code = AddRex2Prefix(ins, code);
-                }
+                code = AddX86PrefixIfNeeded(id, code, size);
 
                 if (TakesRexWPrefix(id))
                 {
@@ -15400,23 +16209,22 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
         default:
 
             assert(id->idGCref() == GCT_NONE);
-
-            code = insEncodeMRreg(id, reg, size, insCodeMR(ins));
+            code = insCodeMR(ins);
+            code = AddX86PrefixIfNeeded(id, code, size);
+            code = insEncodeMRreg(id, reg, size, code);
 
             if (size != EA_1BYTE)
             {
                 // Set the 'w' bit to get the large version
                 code |= 0x1;
 
-                if (size == EA_2BYTE)
+                if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id))
                 {
                     // Output a size prefix for a 16-bit operand
                     dst += emitOutputByte(dst, 0x66);
                 }
             }
 
-            code = AddX86PrefixIfNeeded(id, code, size);
-
             if (TakesRexWPrefix(id))
             {
                 code = AddRexWPrefix(id, code);
@@ -15553,7 +16361,11 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     }
 #ifdef FEATURE_HW_INTRINSICS
     else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
-             (ins == INS_tzcnt))
+             (ins == INS_tzcnt)
+#ifdef TARGET_AMD64
+             || (ins == INS_lzcnt_apx) || (ins == INS_tzcnt_apx) || (ins == INS_popcnt_apx)
+#endif // TARGET_AMD64
+    )
     {
         assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
         code = insCodeRM(ins);
@@ -15564,7 +16376,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
             code |= 0x0100;
         }
 
-        if (size == EA_2BYTE)
+        if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id))
         {
             assert(ins == INS_crc32);
             dst += emitOutputByte(dst, 0x66);
@@ -15577,15 +16389,21 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 #endif // FEATURE_HW_INTRINSICS
     else
     {
+        // TODO-XArch-APX:
+        // some instructions with NDD form might go into this path with EVEX prefix.
+        // might consider having a separate path with checks like: TakesApxExtendedEvexPrefix
+        // essentially, we need to make it clear on the priority and necessity of REX2 and EVEX:
+        // REX2 is needed iff EGPRs are involved.
+        // EVEX is needed when NDD, NF or other features are involved.
+        // So the logic should be:
+        // checking if those new features are used, then check if EGPRs are involved.
+        // EGPRs will be supported by EVEX anyway, so don't need to check in the first place.
         assert(!TakesSimdPrefix(id));
         code = insCodeMR(ins);
-        if (TakesRex2Prefix(id))
-        {
-            code = AddRex2Prefix(ins, code);
-        }
+        code = AddX86PrefixIfNeeded(id, code, size);
         code = insEncodeMRreg(id, code);
 
-        if (ins != INS_test)
+        if (ins != INS_test && !IsShiftInstruction(ins))
         {
             code |= 2;
         }
@@ -15599,7 +16417,17 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 
             case EA_2BYTE:
                 // Output a size prefix for a 16-bit operand
-                dst += emitOutputByte(dst, 0x66);
+                if (TakesApxExtendedEvexPrefix(id))
+                {
+                    assert(IsApxExtendedEvexInstruction(ins));
+                    assert(hasEvexPrefix(code));
+                    // Evex.pp should already be added when adding the prefix.
+                    assert((code & EXTENDED_EVEX_PP_BITS) != 0);
+                }
+                else
+                {
+                    dst += emitOutputByte(dst, 0x66);
+                }
                 FALLTHROUGH;
 
             case EA_4BYTE:
@@ -15650,8 +16478,18 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
         }
     }
 
-    unsigned regCode = insEncodeReg345(id, regFor345Bits, size, &code);
-    regCode |= insEncodeReg012(id, regFor012Bits, size, &code);
+    unsigned regCode;
+    if (!id->idIsEvexNdContextSet() || !IsApxNDDEncodableInstruction(ins))
+    {
+        regCode = insEncodeReg345(id, regFor345Bits, size, &code);
+        regCode |= insEncodeReg012(id, regFor012Bits, size, &code);
+    }
+    else
+    {
+        // unary ins with NDD form use Evex.vvvvv for dst, and ModRM.rm for src
+        code    = insEncodeReg3456(id, reg1, size, code);
+        regCode = insEncodeReg012(id, reg2, size, &code);
+    }
 
     if (TakesSimdPrefix(id))
     {
@@ -15709,6 +16547,11 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
         dst += emitOutputByte(dst, (0xC0 | regCode));
     }
+    else if (IsApxNDDEncodableInstruction(ins) && id->idIsEvexNdContextSet())
+    {
+        dst += emitOutputByte(dst, (code & 0xFF));
+        dst += emitOutputByte(dst, (0xC0 | regCode | (code >> 8)));
+    }
     else
     {
         dst += emitOutputWord(dst, code);
@@ -15718,155 +16561,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     // Does this instruction operate on a GC ref value?
     if (id->idGCref())
     {
-        switch (id->idInsFmt())
-        {
-            case IF_RRD_RRD:
-                break;
-
-            case IF_RWR_RRD:
-            {
-                if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
-                {
-                    // We're relocating "this" in the prolog
-                    assert(emitComp->lvaIsOriginalThisArg(0));
-                    assert(emitComp->lvaTable[0].lvRegister);
-                    assert(emitComp->lvaTable[0].GetRegNum() == reg1);
-
-                    if (emitFullGCinfo)
-                    {
-                        emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
-                        break;
-                    }
-                    else
-                    {
-                        /* If emitFullGCinfo==false, the we don't use any
-                           regPtrDsc's and so explicitly note the location
-                           of "this" in GCEncode.cpp
-                         */
-                    }
-                }
-
-                emitGCregLiveUpd(id->idGCref(), reg1, dst);
-                break;
-            }
-
-            case IF_RRW_RRD:
-            {
-                switch (id->idIns())
-                {
-                    /*
-                        This must be one of the following cases:
-
-                        xor reg, reg        to assign NULL
-
-                        and r1 , r2         if (ptr1 && ptr2) ...
-                        or  r1 , r2         if (ptr1 || ptr2) ...
-
-                        add r1 , r2         to compute a normal byref
-                        sub r1 , r2         to compute a strange byref (VC only)
-
-                    */
-                    case INS_xor:
-                        assert(reg1 == reg2);
-                        emitGCregLiveUpd(id->idGCref(), reg1, dst);
-                        break;
-
-                    case INS_or:
-                    case INS_and:
-                        emitGCregDeadUpd(reg1, dst);
-                        break;
-
-                    case INS_add:
-                    case INS_sub:
-                    case INS_sub_hide:
-                        assert(id->idGCref() == GCT_BYREF);
-
-#if 0
-#ifdef DEBUG
-                        // Due to elided register moves, we can't have the following assert.
-                        // For example, consider:
-                        //    t85 = LCL_VAR byref V01 arg1 rdx (last use) REG rdx
-                        //        /--*  t85    byref
-                        //        *  STORE_LCL_VAR byref  V40 tmp31 rdx REG rdx
-                        // Here, V01 is type `long` on entry, then is stored as a byref. But because
-                        // the register allocator assigned the same register, no instruction was
-                        // generated, and we only (currently) make gcref/byref changes in emitter GC info
-                        // when an instruction is generated. We still generate correct GC info, as this
-                        // instruction, if writing a GC ref even through reading a long, will go live here.
-                        // These situations typically occur due to unsafe casting, such as with Span<T>.
-
-                        regMaskTP regMask;
-                        regMask = genRegMask(reg1) | genRegMask(reg2);
-
-                        // r1/r2 could have been a GCREF as GCREF + int=BYREF
-                        //                               or BYREF+/-int=BYREF
-                        assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
-                               ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)));
-#endif // DEBUG
-#endif // 0
-
-                        // Mark r1 as holding a byref
-                        emitGCregLiveUpd(GCT_BYREF, reg1, dst);
-                        break;
-
-                    default:
-#ifdef DEBUG
-                        emitDispIns(id, false, false, false);
-#endif
-                        assert(!"unexpected GC reg update instruction");
-                }
-
-                break;
-            }
-
-            case IF_RRW_RRW:
-            {
-                // This must be "xchg reg1, reg2"
-                assert(id->idIns() == INS_xchg);
-
-                // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
-                // register pointer mask.
-
-                GCtype gc1, gc2;
-
-                gc1 = emitRegGCtype(reg1);
-                gc2 = emitRegGCtype(reg2);
-
-                if (gc1 != gc2)
-                {
-                    // Kill the GC-info about the GC registers
-
-                    if (needsGC(gc1))
-                    {
-                        emitGCregDeadUpd(reg1, dst);
-                    }
-
-                    if (needsGC(gc2))
-                    {
-                        emitGCregDeadUpd(reg2, dst);
-                    }
-
-                    // Now, swap the info
-
-                    if (needsGC(gc1))
-                    {
-                        emitGCregLiveUpd(gc1, reg2, dst);
-                    }
-
-                    if (needsGC(gc2))
-                    {
-                        emitGCregLiveUpd(gc2, reg1, dst);
-                    }
-                }
-                break;
-            }
-
-            default:
-#ifdef DEBUG
-                emitDispIns(id, false, false, false);
-#endif
-                assert(!"unexpected GC ref instruction format");
-        }
+        emitHandleGCrefRegs(dst, id);
     }
     else
     {
@@ -15911,8 +16606,9 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
     code_t code;
 
     instruction ins = id->idIns();
-    assert(IsVexOrEvexEncodableInstruction(ins));
-    assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins) || isAvx512Blendv(ins) || IsKInstruction(ins));
+    assert(IsVexOrEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins) || isAvx512Blendv(ins) || IsKInstruction(ins) ||
+           IsApxExtendedEvexInstruction(ins));
     regNumber targetReg = id->idReg1();
     regNumber src1      = id->idReg2();
     regNumber src2      = id->idReg3();
@@ -15921,6 +16617,51 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
     code = insCodeRM(ins);
     code = AddX86PrefixIfNeeded(id, code, size);
 
+    if (IsApxExtendedEvexInstruction(ins) && !IsBMIInstruction(ins))
+    {
+        // TODO-XArch-apx:
+        // For rm-like operand encoding instructions:
+        // legacy promoted EVEX encoding has introduced different semantic:
+        // op1 - vvvvv
+        // op2 - MODRM.REG
+        // op3 - MODRM.R/M
+        regNumber tmp = src1;
+        src1          = targetReg;
+        targetReg     = tmp;
+
+        switch (size)
+        {
+            case EA_1BYTE:
+                // TODO-APX : verify  We should never end up here. Atleast for instructions I have looked into, we
+                // promote to int to do operation
+                noway_assert(RBM_BYTE_REGS & genRegMask(src1));
+                noway_assert(RBM_BYTE_REGS & genRegMask(src2));
+                noway_assert(RBM_BYTE_REGS & genRegMask(targetReg));
+                break;
+
+            case EA_2BYTE:
+            case EA_4BYTE:
+                // Set the 'w' bit to get the large version
+                code = insIsCMOV(ins) ? code : (code | (0x01));
+                break;
+
+#ifdef TARGET_AMD64
+            case EA_8BYTE:
+                // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
+                // Don't need to zero out the high bits explicitly
+                code = AddRexWPrefix(id, code); // TODO-APX : Revisit. does xor or other cases need to be handled
+                                                // differently? see emitOutputRR
+                // Set the 'w' bit to get the large version
+                code = insIsCMOV(ins) ? code : (code | (0x01));
+                break;
+
+#endif // TARGET_AMD64
+
+            default:
+                assert(!"unexpected size");
+        }
+    }
+
     code = insEncodeRMreg(id, code);
 
     if (TakesRexWPrefix(id))
@@ -15968,7 +16709,10 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
         dst += emitOutputByte(dst, (0xC0 | regCode));
     }
 
-    noway_assert(!id->idGCref());
+    if (id->idGCref())
+    {
+        emitHandleGCrefRegs(dst, id);
+    }
 
     if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
     {
@@ -16151,6 +16895,12 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
                 useACC    = true;
             }
         }
+
+        if (TakesApxExtendedEvexPrefix(id))
+        {
+            // ACC form does not have support for promoted EVEX.
+            useACC = false;
+        }
     }
     else
     {
@@ -16206,7 +16956,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
 
         case EA_2BYTE:
             // Output a size prefix for a 16-bit operand
-            dst += emitOutputByte(dst, 0x66);
+            if (!TakesApxExtendedEvexPrefix(id))
+            {
+                dst += emitOutputByte(dst, 0x66);
+            }
             FALLTHROUGH;
 
         case EA_4BYTE:
@@ -16884,7 +17637,23 @@ ssize_t emitter::GetInputSizeInBytes(instrDesc* id) const
 //
 ssize_t emitter::TryEvexCompressDisp8Byte(instrDesc* id, ssize_t dsp, bool* dspInByte)
 {
-    assert(TakesEvexPrefix(id));
+    assert(TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id));
+
+    if (!hasTupleTypeInfo(id->idIns()))
+    {
+        // After APX, some instructions with APX features will be promoted
+        // to APX-EVEX, we will re-use the existing displacement emitting
+        // path, but for those instructions with no tuple information,
+        // APX-EVEX treat the scaling factor to be 1 constantly.
+        instruction ins = id->idIns();
+        // TODO-XArch-APX:
+        // This assert may need tweak if BMI1 instructions are promoted
+        // into EVEX for multiple features, currently only EVEX.NF.
+        assert(IsApxExtendedEvexInstruction(id->idIns()));
+        *dspInByte = ((signed char)dsp == (ssize_t)dsp);
+        return dsp;
+    }
+
     insTupleType tt = insTupleTypeInfo(id->idIns());
     assert(hasTupleTypeInfo(id->idIns()));
 
@@ -17539,7 +18308,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             }
 
             // Output a size prefix for a 16-bit operand
-            if (size == EA_2BYTE)
+            if (size == EA_2BYTE && !TakesApxExtendedEvexPrefix(id))
             {
                 dst += emitOutputByte(dst, 0x66);
             }
@@ -17555,6 +18324,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
         }
 
+        case IF_RWR_RRD_SHF:
+        {
+            assert(IsApxExtendedEvexInstruction(ins));
+            code = insCodeMR(ins);
+            code = AddX86PrefixIfNeeded(id, code, size);
+            code = insEncodeMRreg(id, id->idReg2(), size, code);
+            code = insEncodeReg3456(id, id->idReg1(), size, code);
+
+            // set the W bit
+            if (size != EA_1BYTE)
+            {
+                code |= 1;
+            }
+
+            // Emit the REX prefix if it exists
+            if (TakesRexWPrefix(id))
+            {
+                code = AddRexWPrefix(id, code);
+            }
+
+            dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code);
+            dst += emitOutputWord(dst, code);
+            dst += emitOutputByte(dst, emitGetInsSC(id));
+            sz = emitSizeOfInsDsc_CNS(id);
+
+            // Update GC info.
+            assert(!id->idGCref());
+            emitGCregDeadUpd(id->idReg1(), dst);
+            break;
+        }
+
         case IF_RRD_RRD:
         case IF_RWR_RRD:
         case IF_RRW_RRD:
@@ -17628,7 +18428,105 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             // Also, determine which operand goes where in the ModRM byte.
             regNumber mReg;
             regNumber rReg;
-            if (hasCodeMR(ins))
+            if (IsApxExtendedEvexInstruction(ins))
+            {
+                assert(hasCodeMI(ins));
+                code              = insCodeMI(ins);
+                code              = AddX86PrefixIfNeeded(id, code, size);
+                code              = insEncodeReg3456(id, id->idReg1(), size, code);
+                mReg              = id->idReg2();
+                code              = insEncodeMIreg(id, mReg, size, code);
+                rReg              = REG_NA;
+                ssize_t val       = emitGetInsSC(id);
+                bool    valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test);
+
+                switch (size)
+                {
+                    case EA_1BYTE:
+                        break;
+
+                    case EA_2BYTE:
+                        code |= EXTENDED_EVEX_PP_BITS;
+                        FALLTHROUGH;
+
+                    case EA_4BYTE:
+                        code |= 1;
+                        break;
+
+#ifdef TARGET_AMD64
+                    case EA_8BYTE:
+                        code = AddRexWPrefix(id, code);
+                        code |= 1;
+                        break;
+#endif // TARGET_AMD64
+
+                    default:
+                        assert(!"unexpected size");
+                }
+
+                dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code);
+
+                if (valInByte && size > EA_1BYTE)
+                {
+                    code |= 2;
+                    dst += emitOutputWord(dst, code);
+                    dst += emitOutputByte(dst, val);
+                }
+                else
+                {
+                    dst += emitOutputWord(dst, code);
+                    switch (size)
+                    {
+                        case EA_1BYTE:
+                            dst += emitOutputByte(dst, val);
+                            break;
+                        case EA_2BYTE:
+                            dst += emitOutputWord(dst, val);
+                            break;
+                        case EA_4BYTE:
+                            dst += emitOutputLong(dst, val);
+                            break;
+#ifdef TARGET_AMD64
+                        case EA_8BYTE:
+                            dst += emitOutputLong(dst, val);
+                            break;
+#endif // TARGET_AMD64
+                        default:
+                            break;
+                    }
+
+                    if (id->idIsCnsReloc())
+                    {
+                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
+                        assert(size == EA_4BYTE);
+                    }
+                }
+
+                sz = emitSizeOfInsDsc_CNS(id);
+
+                if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
+                {
+                    emitGCregDeadUpd(id->idReg1(), dst);
+                }
+
+                switch (id->idInsFmt())
+                {
+                    case IF_RWR_RRD_CNS:
+                        assert(!instrIs3opImul(ins));
+
+                        emitGCregDeadUpd(id->idReg1(), dst);
+                        break;
+
+                    default:
+#ifdef DEBUG
+                        emitDispIns(id, false, false, false);
+#endif
+                        assert(!"unexpected GC ref instruction format");
+                }
+
+                break;
+            }
+            else if (hasCodeMR(ins))
             {
                 code = insCodeMR(ins);
                 // Emit the VEX prefix if it exists
@@ -17863,6 +18761,23 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         {
             code = insCodeRM(ins);
 
+            if (id->idIsEvexNdContextSet() && TakesApxExtendedEvexPrefix(id))
+            {
+                // TODO-XArch-APX:
+                // I'm not sure why instructions on this path can be with instruction
+                // format other than IF_RWR_RRD_ARD, fix here for debug purpose only,
+                // need revisit.
+                id->idInsFmt(IF_RWR_RRD_ARD);
+
+                code    = AddX86PrefixIfNeeded(id, code, size);
+                code    = insEncodeReg3456(id, id->idReg1(), size, code);
+                regcode = (insEncodeReg345(id, id->idReg2(), size, &code) << 8);
+                dst     = emitOutputAM(dst, id, code | regcode);
+
+                sz = emitSizeOfInsDsc_AMD(id);
+                break;
+            }
+
             if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32))
             {
                 // Special case 4-byte AVX instructions as the
@@ -18130,7 +19045,19 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         case IF_RRW_RRD_SRD:
         case IF_RWR_RWR_SRD:
         {
-            assert(IsVexOrEvexEncodableInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins));
+
+            if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins))
+            {
+                // EVEX.vvvv has different semantic for APX-EVEX NDD instructions.
+                code    = insCodeRM(ins);
+                code    = AddX86PrefixIfNeeded(id, code, size);
+                code    = insEncodeReg3456(id, id->idReg1(), size, code);
+                regcode = (insEncodeReg345(id, id->idReg2(), size, &code) << 8);
+                dst     = emitOutputSV(dst, id, code | regcode);
+                sz      = sizeof(instrDesc);
+                break;
+            }
 
             code = insCodeRM(ins);
             code = AddX86PrefixIfNeeded(id, code, size);
@@ -19195,6 +20122,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
                     break;
 
                 case IF_RRW:
+                // TODO-XArch-APX: to be verified if this data is correct for NDD form.
+                case IF_RWR_RRD:
                     // ins   reg, cl
                     result.insThroughput = PERFSCORE_THROUGHPUT_2C;
                     result.insLatency    = PERFSCORE_LATENCY_2C;
@@ -19222,6 +20151,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             switch (insFmt)
             {
                 case IF_RRW:
+                // TODO-XArch-APX: to be verified if this data is correct for NDD form.
+                case IF_RWR_RRD:
                     // ins   reg, 1
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     break;
@@ -19255,6 +20186,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             switch (insFmt)
             {
                 case IF_RRW_SHF:
+                case IF_RWR_RRD_SHF:
                     // ins   reg, cns
                     result.insThroughput = PERFSCORE_THROUGHPUT_2X;
                     break;
@@ -20225,6 +21157,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vshuff64x2:
         case INS_vshufi32x4:
         case INS_vshufi64x2:
+#ifdef TARGET_AMD64
+        case INS_popcnt_apx:
+        case INS_lzcnt_apx:
+        case INS_tzcnt_apx:
+#endif // TARGET_AMD64
         {
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_3C;
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index 5f820c7c022c20..8e149ed5be3389 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -134,11 +134,18 @@ static regNumber getSseShiftRegNumber(instruction ins);
 bool             HasVexEncoding(instruction ins) const;
 bool             HasEvexEncoding(instruction ins) const;
 bool             HasRex2Encoding(instruction ins) const;
+bool             HasApxNdd(instruction ins) const;
+bool             HasApxNf(instruction ins) const;
 bool             IsVexEncodableInstruction(instruction ins) const;
 bool             IsEvexEncodableInstruction(instruction ins) const;
 bool             IsRex2EncodableInstruction(instruction ins) const;
+bool             IsApxNDDEncodableInstruction(instruction ins) const;
+bool             IsApxNFEncodableInstruction(instruction ins) const;
+bool             IsApxExtendedEvexInstruction(instruction ins) const;
+bool             IsShiftInstruction(instruction ins) const;
 bool             IsLegacyMap1(code_t code) const;
 bool             IsVexOrEvexEncodableInstruction(instruction ins) const;
+bool             DoJitUseApxNDD(instruction ins) const;
 
 code_t insEncodeMIreg(const instrDesc* id, regNumber reg, emitAttr size, code_t code);
 
@@ -179,6 +186,8 @@ bool AreFlagsSetForSignJumpOpt(regNumber reg, emitAttr opSize, GenCondition cond
 
 insOpts GetEmbRoundingMode(uint8_t mode) const;
 
+void emitHandleGCrefRegs(BYTE* dst, instrDesc* id);
+
 bool hasRexPrefix(code_t code)
 {
 #ifdef TARGET_AMD64
@@ -332,6 +341,18 @@ void SetUseRex2Encoding(bool value)
     useRex2Encodings = value;
 }
 
+// Is Promoted EVEX encoding supported.
+bool usePromotedEVEXEncodings;
+bool UsePromotedEVEXEncoding() const
+{
+    return usePromotedEVEXEncodings;
+}
+
+void SetUsePromotedEVEXEncoding(bool value)
+{
+    usePromotedEVEXEncodings = value;
+}
+
 //------------------------------------------------------------------------
 // UseSimdEncoding: Returns true if either VEX or EVEX encoding is supported
 // contains Evex prefix.
@@ -349,6 +370,7 @@ bool UseSimdEncoding() const
 #define EVEX_PREFIX_CODE 0x6200000000000000ULL
 
 bool TakesEvexPrefix(const instrDesc* id) const;
+bool TakesApxExtendedEvexPrefix(const instrDesc* id) const;
 
 //------------------------------------------------------------------------
 // hasEvexPrefix: Returns true if the instruction encoding already
@@ -405,11 +427,7 @@ code_t AddSimdPrefixIfNeeded(const instrDesc* id, code_t code, emitAttr size)
 //
 code_t AddX86PrefixIfNeeded(const instrDesc* id, code_t code, emitAttr size)
 {
-    // TODO-xarch-apx:
-    // consider refactor this part with AddSimdPrefixIfNeeded as a lot of functionality
-    // of these functions are overlapping.
-
-    if (TakesEvexPrefix(id))
+    if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
     {
         return AddEvexPrefix(id, code, size);
     }
@@ -445,7 +463,7 @@ code_t AddX86PrefixIfNeededAndNotPresent(const instrDesc* id, code_t code, emitA
     // consider refactor this part with AddSimdPrefixIfNeeded as a lot of functionality
     // of these functions are overlapping.
 
-    if (TakesEvexPrefix(id))
+    if (TakesEvexPrefix(id) || TakesApxExtendedEvexPrefix(id))
     {
         return !hasEvexPrefix(code) ? AddEvexPrefix(id, code, size) : code;
     }
@@ -511,6 +529,48 @@ void SetEvexEmbMaskIfNeeded(instrDesc* id, insOpts instOptions)
     }
 }
 
+//------------------------------------------------------------------------
+// SetEvexNdIfNeeded: set NDD form - new data destination if needed.
+//
+// Arguments:
+//    id          - instruction descriptor
+//    instOptions - emit options
+//
+void SetEvexNdIfNeeded(instrDesc* id, insOpts instOptions)
+{
+    if ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0)
+    {
+        assert(UsePromotedEVEXEncoding());
+        assert(IsApxNDDEncodableInstruction(id->idIns()));
+        id->idSetEvexNdContext();
+    }
+    else
+    {
+        assert((instOptions & INS_OPTS_EVEX_nd_MASK) == 0);
+    }
+}
+
+//------------------------------------------------------------------------
+// SetEvexNdIfNeeded: set Evex.nf on instrDesc
+//
+// Arguments:
+//    id          - instruction descriptor
+//    instOptions - emit options
+//
+void SetEvexNfIfNeeded(instrDesc* id, insOpts instOptions)
+{
+    if ((instOptions & INS_OPTS_EVEX_nf_MASK) != 0)
+    {
+        assert(UsePromotedEVEXEncoding());
+        assert(IsApxNFEncodableInstruction(id->idIns()));
+        id->idSetEvexNfContext();
+    }
+    else
+    {
+        assert((instOptions & INS_OPTS_EVEX_nf_MASK) == 0);
+    }
+}
+
 //------------------------------------------------------------------------
 // AddSimdPrefixIfNeeded: Add the correct SIMD prefix.
 // Check if the prefix already exists befpre adding.
@@ -753,7 +813,7 @@ void emitIns_Data16();
 
 void emitIns_I(instruction ins, emitAttr attr, cnsval_ssize_t val);
 
-void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts instOptions = INS_OPTS_NONE);
 
 void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
 
@@ -762,7 +822,9 @@ void emitIns_A(instruction ins, emitAttr attr, GenTreeIndir* indir);
 void emitIns_R_I(instruction ins,
                  emitAttr    attr,
                  regNumber   reg,
-                 ssize_t val DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
+                 ssize_t     val,
+                 insOpts instOptions = INS_OPTS_NONE DEBUGARG(size_t targetHandle = 0)
+                     DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
 void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srgReg, bool canSkip);
 
@@ -771,7 +833,7 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2,
 void emitIns_R_R_I(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival, insOpts instOptions = INS_OPTS_NONE);
 
-void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs);
+void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs, insOpts instOptions = INS_OPTS_NONE);
 
 void emitIns_AR_R_R(instruction ins,
                     emitAttr    attr,
@@ -942,7 +1004,8 @@ void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 
 void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
 
-void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int offs);
+void emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int offs, insOpts instOptions = INS_OPTS_NONE);
 
 void emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp);
 
@@ -953,7 +1016,12 @@ void emitIns_R_AI(instruction  ins,
                   regNumber    ireg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
-void emitIns_AR_R(instruction ins, emitAttr attr, regNumber reg, regNumber base, cnsval_ssize_t disp);
+void emitIns_AR_R(instruction    ins,
+                  emitAttr       attr,
+                  regNumber      reg,
+                  regNumber      base,
+                  cnsval_ssize_t disp,
+                  insOpts        instOptions = INS_OPTS_NONE);
 
 void emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
 
@@ -974,7 +1042,8 @@ void emitIns_ARX_R(instruction    ins,
                    regNumber      base,
                    regNumber      index,
                    unsigned       scale,
-                   cnsval_ssize_t disp);
+                   cnsval_ssize_t disp,
+                   insOpts        instOptions = INS_OPTS_NONE);
 
 void emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp);
 
@@ -1122,6 +1191,13 @@ void emitIns_SIMD_R_R_R_S_I(instruction ins,
                             insOpts     instOptions);
 #endif // FEATURE_HW_INTRINSICS
 
+void emitIns_BASE_R_R(instruction ins, emitAttr attr, regNumber op1Reg, regNumber op2Reg);
+
+void emitIns_BASE_R_R_I(instruction ins, emitAttr attr, regNumber op1Reg, regNumber op2Reg, int ival);
+
+regNumber emitIns_BASE_R_R_RM(
+    instruction ins, emitAttr attr, regNumber targetReg, GenTree* treeNode, GenTree* regOp, GenTree* rmOp);
+
 enum EmitCallType
 {
     EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method (call addr with RIP-relative encoding)
diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp
index d9710994cf152b..940077dc6d2d62 100644
--- a/src/coreclr/jit/fgehopt.cpp
+++ b/src/coreclr/jit/fgehopt.cpp
@@ -2761,14 +2761,20 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
     // this is cheaper than any other insertion point, as no existing regions get renumbered.
     //
     unsigned insertBeforeIndex = enclosingTryIndex;
-    if (insertBeforeIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+    if ((enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) && (enclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX))
     {
-        JITDUMP("Cloned EH clauses will go at the end of the EH table\n");
+        JITDUMP("No enclosing EH region; cloned EH clauses will go at the end of the EH table\n");
         insertBeforeIndex = compHndBBtabCount;
     }
+    else if ((enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) || (enclosingHndIndex < enclosingTryIndex))
+    {
+        JITDUMP("Cloned EH clauses will go before enclosing handler region EH#%02u\n", enclosingHndIndex);
+        insertBeforeIndex = enclosingHndIndex;
+    }
     else
     {
-        JITDUMP("Cloned EH clauses will go before enclosing region EH#%02u\n", enclosingTryIndex);
+        JITDUMP("Cloned EH clauses will go before enclosing try region EH#%02u\n", enclosingTryIndex);
+        assert(insertBeforeIndex == enclosingTryIndex);
     }
 
     // Once we call fgTryAddEHTableEntries with deferCloning = false,
@@ -2989,7 +2995,7 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
             const unsigned originalTryIndex = block->getTryIndex();
             unsigned       cloneTryIndex    = originalTryIndex;
 
-            if (originalTryIndex <= outermostTryIndex)
+            if (originalTryIndex < enclosingTryIndex)
             {
                 cloneTryIndex += indexShift;
             }
@@ -3003,11 +3009,15 @@ BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info,
         if (block->hasHndIndex())
         {
             const unsigned originalHndIndex = block->getHndIndex();
+            unsigned       cloneHndIndex    = originalHndIndex;
+
+            if (originalHndIndex < enclosingHndIndex)
+            {
+                cloneHndIndex += indexShift;
+            }
 
-            // if (originalHndIndex ==
-            const unsigned  cloneHndIndex = originalHndIndex + indexShift;
-            EHblkDsc* const originalEbd   = ehGetDsc(originalHndIndex);
-            EHblkDsc* const clonedEbd     = ehGetDsc(cloneHndIndex);
+            EHblkDsc* const originalEbd = ehGetDsc(originalHndIndex);
+            EHblkDsc* const clonedEbd   = ehGetDsc(cloneHndIndex);
             newBlock->setHndIndex(cloneHndIndex);
             updateBlockReferences(cloneHndIndex);
 
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index 6187274c70332f..2c0ca450bc8243 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -2213,27 +2213,6 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
     // At this point we know target is BBJ_COND.
     assert(target->KindIs(BBJ_COND));
 
-    // Bail out if OSR, as we can have unusual flow into loops. If one
-    // of target's successors is also a backedge target, this optimization
-    // may mess up loop recognition by creating too many non-loop preds.
-    //
-    if (opts.IsOSR())
-    {
-        if (target->GetFalseTarget()->HasFlag(BBF_BACKWARD_JUMP_TARGET))
-        {
-            JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum,
-                    target->GetFalseTarget()->bbNum);
-            return false;
-        }
-
-        if (target->GetTrueTarget()->HasFlag(BBF_BACKWARD_JUMP_TARGET))
-        {
-            JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum,
-                    target->GetTrueTarget()->bbNum);
-            return false;
-        }
-    }
-
     // See if this block assigns constant or other interesting tree to that same local.
     //
     if (!fgBlockEndFavorsTailDuplication(block, lclNum))
@@ -4952,7 +4931,6 @@ Compiler::ThreeOptLayout::ThreeOptLayout(Compiler* comp)
     , blockOrder(nullptr)
     , tempOrder(nullptr)
     , numCandidateBlocks(0)
-    , currEHRegion(0)
 {
 }
 
@@ -5146,7 +5124,7 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge)
     BasicBlock* const dstBlk = edge->getDestinationBlock();
 
     // Ignore cross-region branches
-    if ((srcBlk->bbTryIndex != currEHRegion) || (dstBlk->bbTryIndex != currEHRegion))
+    if (!BasicBlock::sameTryRegion(srcBlk, dstBlk))
     {
         return;
     }
@@ -5245,8 +5223,7 @@ void Compiler::ThreeOptLayout::AddNonFallthroughPreds(unsigned blockPos)
 }
 
 //-----------------------------------------------------------------------------
-// Compiler::ThreeOptLayout::Run: Runs 3-opt for each contiguous region of the block list
-// we're interested in reordering.
+// Compiler::ThreeOptLayout::Run: Runs 3-opt on the candidate span of hot blocks.
 // We skip reordering handler regions for now, as these are assumed to be cold.
 //
 void Compiler::ThreeOptLayout::Run()
@@ -5292,41 +5269,9 @@ void Compiler::ThreeOptLayout::Run()
 
         // Repurpose 'bbPostorderNum' for the block's ordinal
         block->bbPostorderNum = numCandidateBlocks++;
-
-        // While walking the span of blocks to reorder,
-        // remember where each try region ends within this span.
-        // We'll use this information to run 3-opt per region.
-        EHblkDsc* const HBtab = compiler->ehGetBlockTryDsc(block);
-        if (HBtab != nullptr)
-        {
-            HBtab->ebdTryLast = block;
-        }
-    }
-
-    // Reorder try regions first
-    bool modified = false;
-    for (EHblkDsc* const HBtab : EHClauses(compiler))
-    {
-        // If multiple region indices map to the same region,
-        // make sure we reorder its blocks only once
-        BasicBlock* const tryBeg = HBtab->ebdTryBeg;
-        if (tryBeg->getTryIndex() != currEHRegion++)
-        {
-            continue;
-        }
-
-        // Only reorder try regions within the candidate span of blocks
-        if ((tryBeg->bbPostorderNum < numCandidateBlocks) && (blockOrder[tryBeg->bbPostorderNum] == tryBeg))
-        {
-            JITDUMP("Running 3-opt for try region #%d\n", (currEHRegion - 1));
-            modified |= RunThreeOptPass(tryBeg, HBtab->ebdTryLast);
-        }
     }
 
-    // Finally, reorder the main method body
-    currEHRegion = 0;
-    JITDUMP("Running 3-opt for main method body\n");
-    modified |= RunThreeOptPass(compiler->fgFirstBB, blockOrder[numCandidateBlocks - 1]);
+    const bool modified = RunThreeOptPass();
 
     if (modified)
     {
@@ -5335,14 +5280,25 @@ void Compiler::ThreeOptLayout::Run()
             BasicBlock* const block = blockOrder[i - 1];
             BasicBlock* const next  = blockOrder[i];
 
+            if (block->NextIs(next))
+            {
+                continue;
+            }
+
             // Only reorder within EH regions to maintain contiguity.
-            // TODO: Allow moving blocks in different regions when 'next' is the region entry.
-            // This would allow us to move entire regions up/down because of the contiguity requirement.
-            if (!block->NextIs(next) && BasicBlock::sameEHRegion(block, next))
+            if (!BasicBlock::sameEHRegion(block, next))
+            {
+                continue;
+            }
+
+            // Don't move the entry of an EH region.
+            if (compiler->bbIsTryBeg(next) || compiler->bbIsHandlerBeg(next))
             {
-                compiler->fgUnlinkBlock(next);
-                compiler->fgInsertBBafter(block, next);
+                continue;
             }
+
+            compiler->fgUnlinkBlock(next);
+            compiler->fgInsertBBafter(block, next);
         }
     }
 }
@@ -5487,12 +5443,6 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
                     continue;
                 }
 
-                // Don't consider any cut points that would disturb other EH regions
-                if (!BasicBlock::sameEHRegion(s2Block, s3Block))
-                {
-                    continue;
-                }
-
                 // Compute the cost delta of this partition
                 const weight_t currCost = currCostBase + GetCost(s3BlockPrev, s3Block);
                 const weight_t newCost =
@@ -5550,22 +5500,15 @@ bool Compiler::ThreeOptLayout::RunGreedyThreeOptPass(unsigned startPos, unsigned
 }
 
 //-----------------------------------------------------------------------------
-// Compiler::ThreeOptLayout::RunThreeOptPass: Runs 3-opt for the given block range.
-//
-// Parameters:
-//   startBlock - The first block of the range to reorder
-//   endBlock - The last block (inclusive) of the range to reorder
+// Compiler::ThreeOptLayout::RunThreeOptPass: Runs 3-opt on the candidate span of blocks.
 //
 // Returns:
 //   True if we reordered anything, false otherwise
 //
-bool Compiler::ThreeOptLayout::RunThreeOptPass(BasicBlock* startBlock, BasicBlock* endBlock)
+bool Compiler::ThreeOptLayout::RunThreeOptPass()
 {
-    assert(startBlock != nullptr);
-    assert(endBlock != nullptr);
-
-    const unsigned startPos  = startBlock->bbPostorderNum;
-    const unsigned endPos    = endBlock->bbPostorderNum;
+    const unsigned startPos  = 0;
+    const unsigned endPos    = numCandidateBlocks - 1;
     const unsigned numBlocks = (endPos - startPos + 1);
     assert(startPos <= endPos);
 
diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp
index 2a21e61f0d3a09..63634dc2edeb54 100644
--- a/src/coreclr/jit/fgprofile.cpp
+++ b/src/coreclr/jit/fgprofile.cpp
@@ -4210,10 +4210,7 @@ bool Compiler::fgIncorporateEdgeCounts()
 //
 PhaseStatus Compiler::fgComputeBlockWeights()
 {
-    const bool usingProfileWeights = fgIsUsingProfileWeights();
-    bool       madeChanges         = false;
-    fgModified                     = false;
-    fgCalledCount                  = BB_UNITY_WEIGHT;
+    fgModified = false;
 
 #if DEBUG
     if (verbose)
@@ -4223,40 +4220,38 @@ PhaseStatus Compiler::fgComputeBlockWeights()
     }
 #endif // DEBUG
 
-    weight_t returnWeight = BB_UNITY_WEIGHT;
-
-    madeChanges |= fgComputeMissingBlockWeights(&returnWeight);
-
-    if (usingProfileWeights)
+    if (fgIsUsingProfileWeights())
     {
-        madeChanges |= fgComputeCalledCount(returnWeight);
-    }
-    else
-    {
-        JITDUMP(" -- no profile data, so using default called count\n");
+        // Compute fgCalledCount by subtracting any non-entry flow into fgFirstBB from its weight
+        fgCalledCount = fgFirstBB->bbWeight;
+        for (FlowEdge* const predEdge : fgFirstBB->PredEdges())
+        {
+            fgCalledCount = max(BB_ZERO_WEIGHT, fgCalledCount - predEdge->getLikelyWeight());
+        }
+
+        JITDUMP("We are using the profile weights and fgCalledCount is " FMT_WT "\n", fgCalledCount);
+        return PhaseStatus::MODIFIED_NOTHING;
     }
 
-    return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
+    JITDUMP(" -- no profile data, so using default called count\n");
+    fgCalledCount = BB_UNITY_WEIGHT;
+    return fgComputeMissingBlockWeights() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
 }
 
 //-------------------------------------------------------------
 // fgComputeMissingBlockWeights: determine weights for blocks
 //   that were not profiled and do not yet have weights.
 //
-// Arguments
-//    returnWeight [out] - sum of weights for all return and throw blocks
-//
 // Returns:
 //    true if any changes made
 //
-bool Compiler::fgComputeMissingBlockWeights(weight_t* returnWeight)
+bool Compiler::fgComputeMissingBlockWeights()
 {
     BasicBlock* bSrc;
     BasicBlock* bDst;
     unsigned    iterations = 0;
     bool        changed;
     bool        modified = false;
-    weight_t    weight;
 
     // If we have any blocks that did not have profile derived weight
     // we will try to fix their weight up here
@@ -4265,7 +4260,6 @@ bool Compiler::fgComputeMissingBlockWeights(weight_t* returnWeight)
     do // while (changed)
     {
         changed = false;
-        weight  = 0;
         iterations++;
 
         for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->Next())
@@ -4376,14 +4370,6 @@ bool Compiler::fgComputeMissingBlockWeights(weight_t* returnWeight)
                     bDst->bbSetRunRarely();
                 }
             }
-
-            // Sum up the weights of all of the return blocks and throw blocks
-            // This is used when we have a back-edge into block 1
-            //
-            if (bDst->hasProfileWeight() && bDst->KindIs(BBJ_RETURN, BBJ_THROW))
-            {
-                weight += bDst->bbWeight;
-            }
         }
     }
     // Generally when we synthesize profile estimates we do it in a way where this algorithm will converge
@@ -4400,84 +4386,9 @@ bool Compiler::fgComputeMissingBlockWeights(weight_t* returnWeight)
     }
 #endif
 
-    *returnWeight = weight;
-
     return modified;
 }
 
-//-------------------------------------------------------------
-// fgComputeCalledCount: when profile information is in use,
-//   compute fgCalledCount
-//
-// Argument:
-//   returnWeight - sum of weights for all return and throw blocks
-//
-// Returns:
-//   true if any changes were made
-//
-bool Compiler::fgComputeCalledCount(weight_t returnWeight)
-{
-    // When we are not using profile data we have already setup fgCalledCount
-    // only set it here if we are using profile data
-    assert(fgIsUsingProfileWeights());
-    bool madeChanges = false;
-
-    BasicBlock* firstILBlock = fgFirstBB; // The first block for IL code (i.e. for the IL code at offset 0)
-
-    // OSR methods can have complex entry flow, and so
-    // for OSR we ensure fgFirstBB has plausible profile data.
-    //
-    if (!opts.IsOSR())
-    {
-        // Skip past any/all BBF_INTERNAL blocks that may have been added before the first real IL block.
-        //
-        while (firstILBlock->HasFlag(BBF_INTERNAL))
-        {
-            firstILBlock = firstILBlock->Next();
-        }
-    }
-
-    // The 'firstILBlock' is now expected to have a profile-derived weight
-    assert(firstILBlock->hasProfileWeight());
-
-    // If the first block only has one ref then we use its weight for fgCalledCount.
-    // Otherwise we have backedges into the first block, so instead we use the sum
-    // of the return block weights for fgCalledCount.
-    //
-    // If the profile data has a 0 for the returnWeight
-    // (i.e. the function never returns because it always throws)
-    // then just use the first block weight rather than 0.
-    //
-    if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == BB_ZERO_WEIGHT))
-    {
-        fgCalledCount = firstILBlock->bbWeight;
-    }
-    else
-    {
-        fgCalledCount = returnWeight;
-    }
-
-    // If we allocated a scratch block as the first BB then we need
-    // to set its profile-derived weight to be fgCalledCount
-    if (fgFirstBB->HasFlag(BBF_INTERNAL))
-    {
-        fgFirstBB->setBBProfileWeight(fgCalledCount);
-        madeChanges = true;
-        JITDUMP("fgComputeCalledCount: Modified method entry weight. Data %s inconsistent.\n",
-                fgPgoConsistent ? "is now" : "was already");
-        fgPgoConsistent = false;
-    }
-
-#if DEBUG
-    if (verbose)
-    {
-        printf("We are using the Profile Weights and fgCalledCount is " FMT_WT "\n", fgCalledCount);
-    }
-#endif
-
-    return madeChanges;
-}
-
 //------------------------------------------------------------------------
 // fgProfileWeightsEqual: check if two profile weights are equal
 //   (or nearly so)
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index 8407fd469eff6c..1d1eb069819e75 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -744,7 +744,7 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
     fastPathBb->inheritWeight(prevBb);
 
     // fallback will just execute first time
-    fallbackBb->bbSetRunRarely();
+    fallbackBb->inheritWeightPercentage(tlsRootNullCondBB, 0);
 
     fgRedirectTargetEdge(prevBb, tlsRootNullCondBB);
 
@@ -1180,7 +1180,7 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
         fastPathBb->inheritWeight(prevBb);
 
         // fallback will just execute first time
-        fallbackBb->bbSetRunRarely();
+        fallbackBb->inheritWeightPercentage(prevBb, 0);
 
         // All blocks are expected to be in the same EH region
         assert(BasicBlock::sameEHRegion(prevBb, block));
@@ -1545,7 +1545,7 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G
 
     block->inheritWeight(prevBb);
     isInitedBb->inheritWeight(prevBb);
-    helperCallBb->bbSetRunRarely();
+    helperCallBb->inheritWeightPercentage(isInitedBb, 0);
 
     // All blocks are expected to be in the same EH region
     assert(BasicBlock::sameEHRegion(prevBb, block));
@@ -1847,6 +1847,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
     //
     // Redirect prevBb to lengthCheckBb
     fgRedirectTargetEdge(prevBb, lengthCheckBb);
+    lengthCheckBb->inheritWeight(prevBb);
     assert(prevBb->JumpsToNext());
 
     {
@@ -1859,6 +1860,11 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
         // review: we assume length check always succeeds??
         trueEdge->setLikelihood(1.0);
         falseEdge->setLikelihood(0.0);
+
+        if (lengthCheckBb->hasProfileWeight())
+        {
+            fastpathBb->setBBProfileWeight(falseEdge->getLikelyWeight());
+        }
     }
 
     {
@@ -1869,10 +1875,8 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
     }
 
     //
-    // Re-distribute weights
+    // Ensure all flow out of prevBb converges into block
     //
-    lengthCheckBb->inheritWeight(prevBb);
-    fastpathBb->inheritWeight(lengthCheckBb);
     block->inheritWeight(prevBb);
 
     // All blocks are expected to be in the same EH region
@@ -2551,11 +2555,18 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
         trueEdge->setLikelihood(nullcheckTrueLikelihood);
     }
 
+    // Set nullcheckBb's weight here, so we can propagate it to its successors below
+    nullcheckBb->inheritWeight(firstBb);
+
     if (typeCheckNotNeeded)
     {
         FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, nullcheckBb);
         nullcheckBb->SetFalseEdge(falseEdge);
         falseEdge->setLikelihood(nullcheckFalseLikelihood);
+        fallbackBb->inheritWeight(nullcheckBb);
+        fallbackBb->scaleBBWeight(nullcheckFalseLikelihood);
+        lastBb->inheritWeight(nullcheckBb);
+        lastBb->scaleBBWeight(nullcheckTrueLikelihood);
 
         typeCheckSucceedBb = nullptr;
     }
@@ -2631,7 +2642,6 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
     // The same goes for inherited weights -- the block where we test for B will have
     // the weight of A times the likelihood that A's test fails, etc.
     //
-    nullcheckBb->inheritWeight(firstBb);
     weight_t sumOfPreviousLikelihood = 0;
     for (int candidateId = 0; candidateId < numOfCandidates; candidateId++)
     {
@@ -2666,28 +2676,22 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
         sumOfPreviousLikelihood += likelihood;
     }
 
-    if (fallbackBb->KindIs(BBJ_THROW))
-    {
-        fallbackBb->bbSetRunRarely();
-    }
-    else
+    fallbackBb->inheritWeight(lastTypeCheckBb);
+    fallbackBb->scaleBBWeight(lastTypeCheckBb->GetFalseEdge()->getLikelihood());
+
+    if (fallbackBb->KindIs(BBJ_ALWAYS))
     {
-        assert(fallbackBb->KindIs(BBJ_ALWAYS));
         FlowEdge* const newEdge = fgAddRefPred(lastBb, fallbackBb);
         fallbackBb->SetTargetEdge(newEdge);
-        fallbackBb->inheritWeight(lastTypeCheckBb);
-        weight_t lastTypeCheckFailedLikelihood = lastTypeCheckBb->GetFalseEdge()->getLikelihood();
-        fallbackBb->scaleBBWeight(lastTypeCheckFailedLikelihood);
     }
 
     if (!typeCheckNotNeeded)
     {
         typeCheckSucceedBb->inheritWeight(typeChecksBbs[0]);
         typeCheckSucceedBb->scaleBBWeight(sumOfPreviousLikelihood);
+        lastBb->inheritWeight(firstBb);
     }
 
-    lastBb->inheritWeight(firstBb);
-
     //
     // Validate EH regions
     //
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 4f00ca62627966..3ff3708785cd4b 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -221,6 +221,12 @@ enum insFlags : uint64_t
     // APX: REX2 prefix:
     Encoding_REX2  = 1ULL << 44,
 
+    // APX: EVEX.ND:
+    INS_Flags_Has_NDD  = 1ULL << 45,    
+    
+    // APX: EVEX.NF:
+    INS_Flags_Has_NF  = 1ULL << 46,
+
     //  TODO-Cleanup:  Remove this flag and its usage from TARGET_XARCH
     INS_FLAGS_DONT_CARE = 0x00ULL,
 };
@@ -259,6 +265,19 @@ enum insOpts: unsigned
     INS_OPTS_EVEX_z_MASK = 0x20,    // mask for EVEX.z related features
 
     INS_OPTS_EVEX_em_zero = 1 << 5, // Embedded mask merges with zero
+
+    // One-bit:  0b0100_0000
+    INS_OPTS_EVEX_nd_MASK = 0x40,   // mask for APX-EVEX.nd related features
+
+    INS_OPTS_EVEX_nd = 1 << 6,      // NDD form for legacy instructions
+
+    // One-bit:  0b1000_0000
+    INS_OPTS_EVEX_nf_MASK = 0x80,   // mask for APX-EVEX.nf related features
+
+    INS_OPTS_EVEX_nf = 1 << 7,      // NDD form for legacy instructions
+
+    INS_OPTS_EVEX_NoApxPromotion = 1 << 8,    // Do not promote to APX-EVEX
+
 };
 
 #elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 24be0ef3527b6a..f4c5df821190af 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -58,26 +58,26 @@ INST5(pop,              "pop",              IUM_WR, 0x00008E,     BAD_CODE,
 INST5(push_hide,        "push",             IUM_RD, 0x0030FE,     0x000068,     BAD_CODE,     BAD_CODE,     0x000050,    INS_TT_NONE,    Encoding_REX2)
 INST5(pop_hide,         "pop",              IUM_WR, 0x00008E,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000058,    INS_TT_NONE,    Encoding_REX2)
 
-INST5(inc,              "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000040,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit |Encoding_REX2)
-INST5(inc_l,            "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C0FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | Encoding_REX2)
-INST5(dec,              "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x000048,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST5(dec_l,            "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C8FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | Encoding_REX2)
+INST5(inc,              "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     0x0000FE,     BAD_CODE,     0x000040,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST5(inc_l,            "inc",              IUM_RW, 0x0000FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C0FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | Encoding_REX2 | INS_Flags_Has_NF)
+INST5(dec,              "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     0x0008FE,     BAD_CODE,     0x000048,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST5(dec_l,            "dec",              IUM_RW, 0x0008FE,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C8FE,    INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF | Encoding_REX2 | INS_Flags_Has_NF)
 
 // Multi-byte opcodes without modrm are represented in mixed endian fashion.
 // See comment around quarter way through this file for more information.
 INST5(bswap,            "bswap",            IUM_RW, 0x0F00C8,     BAD_CODE,     BAD_CODE,     BAD_CODE,     0x00C80F,    INS_TT_NONE,    Encoding_REX2)
 
 //    id                nm                  um      mr            mi            rm            a4                         tt              flags
-INST4(add,              "add",              IUM_RW, 0x000000,     0x000080,     0x000002,     0x000004,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST4(or,               "or",               IUM_RW, 0x000008,     0x000880,     0x00000A,     0x00000C,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
+INST4(add,              "add",              IUM_RW, 0x000000,     0x000080,     0x000002,     0x000004,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST4(or,               "or",               IUM_RW, 0x000008,     0x000880,     0x00000A,     0x00000C,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
 INST4(adc,              "adc",              IUM_RW, 0x000010,     0x001080,     0x000012,     0x000014,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | Reads_CF   | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
 INST4(sbb,              "sbb",              IUM_RW, 0x000018,     0x001880,     0x00001A,     0x00001C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | Reads_CF   | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST4(and,              "and",              IUM_RW, 0x000020,     0x002080,     0x000022,     0x000024,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST4(sub,              "sub",              IUM_RW, 0x000028,     0x002880,     0x00002A,     0x00002C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
+INST4(and,              "and",              IUM_RW, 0x000020,     0x002080,     0x000022,     0x000024,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST4(sub,              "sub",              IUM_RW, 0x000028,     0x002880,     0x00002A,     0x00002C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
 // Does not affect the stack tracking in the emitter
 INST4(sub_hide,         "sub",              IUM_RW, 0x000028,     0x002880,     0x00002A,     0x00002C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
 
-INST4(xor,              "xor",              IUM_RW, 0x000030,     0x003080,     0x000032,     0x000034,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
+INST4(xor,              "xor",              IUM_RW, 0x000030,     0x003080,     0x000032,     0x000034,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
 INST4(cmp,              "cmp",              IUM_RD, 0x000038,     0x003880,     0x00003A,     0x00003C,                  INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF                  | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit | Encoding_REX2)
 INST4(test,             "test",             IUM_RD, 0x000084,     0x0000F6,     0x000084,     0x0000A8,                  INS_TT_NONE,    Resets_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Resets_CF                                       | INS_FLAGS_Has_Wbit | Encoding_REX2)
 INST4(mov,              "mov",              IUM_WR, 0x000088,     0x0000C6,     0x00008A,     0x0000B0,                  INS_TT_NONE,    INS_FLAGS_Has_Wbit | Encoding_REX2)
@@ -99,25 +99,25 @@ INST3(movsxd,           "movsxd",           IUM_WR, BAD_CODE,     BAD_CODE,
 #endif
 INST3(movzx,            "movzx",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F00B6,                                INS_TT_NONE,    INS_FLAGS_Has_Wbit | Encoding_REX2)
 
-INST3(cmovo,            "cmovo",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0040,                                INS_TT_NONE,    Reads_OF | Encoding_REX2)
-INST3(cmovno,           "cmovno",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0041,                                INS_TT_NONE,    Reads_OF | Encoding_REX2)
-INST3(cmovb,            "cmovb",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0042,                                INS_TT_NONE,    Reads_CF | Encoding_REX2)
-INST3(cmovae,           "cmovae",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0043,                                INS_TT_NONE,    Reads_CF | Encoding_REX2)
-INST3(cmove,            "cmove",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0044,                                INS_TT_NONE,    Reads_ZF | Encoding_REX2)
-INST3(cmovne,           "cmovne",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0045,                                INS_TT_NONE,    Reads_ZF | Encoding_REX2)
-INST3(cmovbe,           "cmovbe",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0046,                                INS_TT_NONE,    Reads_ZF | Reads_CF | Encoding_REX2)
-INST3(cmova,            "cmova",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0047,                                INS_TT_NONE,    Reads_ZF | Reads_CF | Encoding_REX2)
-INST3(cmovs,            "cmovs",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0048,                                INS_TT_NONE,    Reads_SF | Encoding_REX2)
-INST3(cmovns,           "cmovns",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0049,                                INS_TT_NONE,    Reads_SF | Encoding_REX2)
-INST3(cmovp,            "cmovp",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004A,                                INS_TT_NONE,    Reads_PF | Encoding_REX2)
-INST3(cmovnp,           "cmovnp",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004B,                                INS_TT_NONE,    Reads_PF | Encoding_REX2)
-INST3(cmovl,            "cmovl",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004C,                                INS_TT_NONE,    Reads_OF       | Reads_SF | Encoding_REX2)
-INST3(cmovge,           "cmovge",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004D,                                INS_TT_NONE,    Reads_OF       | Reads_SF | Encoding_REX2)
-INST3(cmovle,           "cmovle",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004E,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF | Encoding_REX2)
-INST3(cmovg,            "cmovg",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004F,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF | Encoding_REX2)
+INST3(cmovo,            "cmovo",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0040,                                INS_TT_NONE,    Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovno,           "cmovno",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0041,                                INS_TT_NONE,    Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovb,            "cmovb",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0042,                                INS_TT_NONE,    Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovae,           "cmovae",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0043,                                INS_TT_NONE,    Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmove,            "cmove",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0044,                                INS_TT_NONE,    Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovne,           "cmovne",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0045,                                INS_TT_NONE,    Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovbe,           "cmovbe",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0046,                                INS_TT_NONE,    Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmova,            "cmova",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0047,                                INS_TT_NONE,    Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovs,            "cmovs",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0048,                                INS_TT_NONE,    Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovns,           "cmovns",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F0049,                                INS_TT_NONE,    Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovp,            "cmovp",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004A,                                INS_TT_NONE,    Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovnp,           "cmovnp",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004B,                                INS_TT_NONE,    Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovl,            "cmovl",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004C,                                INS_TT_NONE,    Reads_OF       | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovge,           "cmovge",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004D,                                INS_TT_NONE,    Reads_OF       | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovle,           "cmovle",           IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004E,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD)
+INST3(cmovg,            "cmovg",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0F004F,                                INS_TT_NONE,    Reads_OF       | Reads_SF      | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD)
 
 INST3(xchg,             "xchg",             IUM_RW, 0x000086,     BAD_CODE,     0x000086,                                INS_TT_NONE,    INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST3(imul,             "imul",             IUM_RW, 0x0F00AC,     BAD_CODE,     0x0F00AF,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
+INST3(imul,             "imul",             IUM_RW, 0x0F00AC,     BAD_CODE,     0x0F00AF,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NDD | INS_Flags_Has_NF)
 
 //    id                nm                  um      mr            mi            rm                                       tt              flags
 
@@ -125,25 +125,25 @@ INST3(imul,             "imul",             IUM_RW, 0x0F00AC,     BAD_CODE,
 // as 2-operand instructions with the target register being implicit
 // implicit_reg = op1*op2_icon
 #define INSTMUL INST3
-INSTMUL(imul_AX,        "imul",             IUM_RD, BAD_CODE,     0x000068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_CX,        "imul",             IUM_RD, BAD_CODE,     0x000868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_DX,        "imul",             IUM_RD, BAD_CODE,     0x001068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_BX,        "imul",             IUM_RD, BAD_CODE,     0x001868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_SP,        "imul",             IUM_RD, BAD_CODE,     BAD_CODE,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_BP,        "imul",             IUM_RD, BAD_CODE,     0x002868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_SI,        "imul",             IUM_RD, BAD_CODE,     0x003068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_DI,        "imul",             IUM_RD, BAD_CODE,     0x003868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
+INSTMUL(imul_AX,        "imul",             IUM_RD, BAD_CODE,     0x000068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_CX,        "imul",             IUM_RD, BAD_CODE,     0x000868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_DX,        "imul",             IUM_RD, BAD_CODE,     0x001068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_BX,        "imul",             IUM_RD, BAD_CODE,     0x001868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_SP,        "imul",             IUM_RD, BAD_CODE,     BAD_CODE,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_BP,        "imul",             IUM_RD, BAD_CODE,     0x002868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_SI,        "imul",             IUM_RD, BAD_CODE,     0x003068,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_DI,        "imul",             IUM_RD, BAD_CODE,     0x003868,     BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
 
 #ifdef TARGET_AMD64
 
-INSTMUL(imul_08,        "imul",             IUM_RD, BAD_CODE,     0x4400000068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_09,        "imul",             IUM_RD, BAD_CODE,     0x4400000868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_10,        "imul",             IUM_RD, BAD_CODE,     0x4400001068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_11,        "imul",             IUM_RD, BAD_CODE,     0x4400001868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_12,        "imul",             IUM_RD, BAD_CODE,     0x4400002068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_13,        "imul",             IUM_RD, BAD_CODE,     0x4400002868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_14,        "imul",             IUM_RD, BAD_CODE,     0x4400003068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
-INSTMUL(imul_15,        "imul",             IUM_RD, BAD_CODE,     0x4400003868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit)
+INSTMUL(imul_08,        "imul",             IUM_RD, BAD_CODE,     0x4400000068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_09,        "imul",             IUM_RD, BAD_CODE,     0x4400000868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_10,        "imul",             IUM_RD, BAD_CODE,     0x4400001068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_11,        "imul",             IUM_RD, BAD_CODE,     0x4400001868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_12,        "imul",             IUM_RD, BAD_CODE,     0x4400002068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_13,        "imul",             IUM_RD, BAD_CODE,     0x4400002868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_14,        "imul",             IUM_RD, BAD_CODE,     0x4400003068, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
+INSTMUL(imul_15,        "imul",             IUM_RD, BAD_CODE,     0x4400003868, BAD_CODE,                                INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Sbit | INS_Flags_Has_NF)
 
 #endif // TARGET_AMD64
 
@@ -593,11 +593,11 @@ INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BA
 INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
 
 // BMI1
-INST3(andn,             "andn",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF2),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF)    // Logical AND NOT
-INST3(bextr,            "bextr",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF7),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Undefined_SF | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF)    // Bit Field Extract
-INST3(blsi,             "blsi",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Extract Lowest Set Isolated Bit
-INST3(blsmsk,           "blsmsk",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Resets_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Get Mask Up to Lowest Set Bit
-INST3(blsr,             "blsr",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Reset Lowest Set Bit
+INST3(andn,             "andn",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF2),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF)    // Logical AND NOT
+INST3(bextr,            "bextr",            IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF7),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Undefined_SF | Writes_ZF    | Undefined_AF | Undefined_PF | Resets_CF | INS_Flags_Has_NF)    // Bit Field Extract
+INST3(blsi,             "blsi",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF)    // Extract Lowest Set Isolated Bit
+INST3(blsmsk,           "blsmsk",           IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Resets_ZF    | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF)    // Get Mask Up to Lowest Set Bit
+INST3(blsr,             "blsr",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF3),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX  | Encoding_EVEX  | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF | INS_Flags_Has_NF)    // Reset Lowest Set Bit
 
 // BMI2
 INST3(bzhi,             "bzhi",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF5),                             INS_TT_NONE,                                          REX_WX       | Encoding_VEX                   | INS_Flags_IsDstDstSrcAVXInstruction | Resets_OF    | Writes_SF    | Writes_ZF    | Undefined_AF | Undefined_PF | Writes_CF)    // Zero High Bits Starting with Specified Bit Position
@@ -952,35 +952,43 @@ INST3(movbe,            "movbe",            IUM_WR, PCKMVB(0xF1), BAD_CODE,
 // POPCNT
 INST3(popcnt,           "popcnt",           IUM_WR, BAD_CODE,     BAD_CODE,     SSEFLT(0xB8),                            INS_TT_NONE,    Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Resets_CF | Encoding_REX2)
 
+#if defined(TARGET_AMD64)
+INST3(tzcnt_apx,        "tzcnt",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0000F4,                                 INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Writes_CF | INS_Flags_Has_NF)    // Count the Number of Trailing Zero Bits
+INST3(lzcnt_apx,        "lzcnt",            IUM_WR, BAD_CODE,     BAD_CODE,     0x0000F5,                                 INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Writes_ZF     | Undefined_AF  | Undefined_PF  | Writes_CF | INS_Flags_Has_NF)
+INST3(popcnt_apx,       "popcnt",           IUM_WR, BAD_CODE,     BAD_CODE,     0x000088,                                 INS_TT_NONE,    Resets_OF      | Resets_SF     | Writes_ZF     | Resets_AF     | Resets_PF     | Resets_CF | INS_Flags_Has_NF)
+#endif // TARGET_AMD64
+
+INST3(neg,              "neg",              IUM_RW, 0x0018F6,     BAD_CODE,     0x0018F6,                                INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(not,              "not",              IUM_RW, 0x0010F6,     BAD_CODE,     0x0010F6,                                INS_TT_NONE,    INS_FLAGS_None | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD)
+
+INST3(rol,              "rol",              IUM_RW, 0x0000D2,     BAD_CODE,     0x0000D2,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rol_1,            "rol",              IUM_RW, 0x0000D0,     0x0000D0,     0x0000D0,                                INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rol_N,            "rol",              IUM_RW, 0x0000C0,     0x0000C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(ror,              "ror",              IUM_RW, 0x0008D2,     BAD_CODE,     0x0008D2,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(ror_1,            "ror",              IUM_RW, 0x0008D0,     0x0008D0,     0x0008D0,                                INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(ror_N,            "ror",              IUM_RW, 0x0008C0,     0x0008C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+
+INST3(rcl,              "rcl",              IUM_RW, 0x0010D2,     BAD_CODE,     0x0010D2,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rcl_1,            "rcl",              IUM_RW, 0x0010D0,     0x0010D0,     0x0010D0,                                INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rcl_N,            "rcl",              IUM_RW, 0x0010C0,     0x0010C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rcr,              "rcr",              IUM_RW, 0x0018D2,     BAD_CODE,     0x0018D2,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rcr_1,            "rcr",              IUM_RW, 0x0018D0,     0x0018D0,     0x0018D0,                                INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(rcr_N,            "rcr",              IUM_RW, 0x0018C0,     0x0018C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shl,              "shl",              IUM_RW, 0x0020D2,     BAD_CODE,     0x0020D2,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shl_1,            "shl",              IUM_RW, 0x0020D0,     0x0020D0,     0x0020D0,                                INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shl_N,            "shl",              IUM_RW, 0x0020C0,     0x0020C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shr,              "shr",              IUM_RW, 0x0028D2,     BAD_CODE,     0x0028D2,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shr_1,            "shr",              IUM_RW, 0x0028D0,     0x0028D0,     0x0028D0,                                INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(shr_N,            "shr",              IUM_RW, 0x0028C0,     0x0028C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(sar,              "sar",              IUM_RW, 0x0038D2,     BAD_CODE,     0x0038D2,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(sar_1,            "sar",              IUM_RW, 0x0038D0,     0x0038D0,     0x0038D0,                                INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+INST3(sar_N,            "sar",              IUM_RW, 0x0038C0,     0x0038C0,     BAD_CODE,                                INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NDD | INS_Flags_Has_NF)
+
 //    id                nm                  um      mr            mi                                                     flags
 INST2(ret,              "ret",              IUM_RD, 0x0000C3,     0x0000C2,                                              INS_TT_NONE,    INS_FLAGS_None)
 INST2(loop,             "loop",             IUM_RD, BAD_CODE,     0x0000E2,                                              INS_TT_NONE,    INS_FLAGS_None)
 INST2(call,             "call",             IUM_RD, 0x0010FF,     0x0000E8,                                              INS_TT_NONE,    Encoding_REX2)
 
-INST2(rol,              "rol",              IUM_RW, 0x0000D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rol_1,            "rol",              IUM_RW, 0x0000D0,     0x0000D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rol_N,            "rol",              IUM_RW, 0x0000C0,     0x0000C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(ror,              "ror",              IUM_RW, 0x0008D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(ror_1,            "ror",              IUM_RW, 0x0008D0,     0x0008D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(ror_N,            "ror",              IUM_RW, 0x0008C0,     0x0008C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-
-INST2(rcl,              "rcl",              IUM_RW, 0x0010D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rcl_1,            "rcl",              IUM_RW, 0x0010D0,     0x0010D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rcl_N,            "rcl",              IUM_RW, 0x0010C0,     0x0010C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rcr,              "rcr",              IUM_RW, 0x0018D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rcr_1,            "rcr",              IUM_RW, 0x0018D0,     0x0018D0,                                              INS_TT_NONE,    Writes_OF                                                                      | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(rcr_N,            "rcr",              IUM_RW, 0x0018C0,     0x0018C0,                                              INS_TT_NONE,    Undefined_OF                                                                   | Writes_CF | Reads_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shl,              "shl",              IUM_RW, 0x0020D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shl_1,            "shl",              IUM_RW, 0x0020D0,     0x0020D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shl_N,            "shl",              IUM_RW, 0x0020C0,     0x0020C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shr,              "shr",              IUM_RW, 0x0028D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shr_1,            "shr",              IUM_RW, 0x0028D0,     0x0028D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(shr_N,            "shr",              IUM_RW, 0x0028C0,     0x0028C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(sar,              "sar",              IUM_RW, 0x0038D2,     BAD_CODE,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(sar_1,            "sar",              IUM_RW, 0x0038D0,     0x0038D0,                                              INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST2(sar_N,            "sar",              IUM_RW, 0x0038C0,     0x0038C0,                                              INS_TT_NONE,    Undefined_OF   | Writes_SF     | Writes_ZF     | Undefined_AF  | Writes_PF     | Writes_CF             | INS_FLAGS_Has_Wbit | Encoding_REX2)
-
-
 //    id                nm                  um      mr                                                                   flags
 INST1(r_movsb,          "rep movsb",        IUM_RD, 0x00A4F3,                                                            INS_TT_NONE,    Reads_DF | INS_FLAGS_Has_Wbit)
 INST1(r_movsd,          "rep movsd",        IUM_RD, 0x00A5F3,                                                            INS_TT_NONE,    Reads_DF | INS_FLAGS_Has_Wbit)
@@ -1012,15 +1020,12 @@ INST1(leave,            "leave",            IUM_RD, 0x0000C9,
 
 INST1(serialize,        "serialize",        IUM_RD, 0x0fe801,                                                            INS_TT_NONE,    INS_FLAGS_None)
 
-INST1(neg,              "neg",              IUM_RW, 0x0018F6,                                                            INS_TT_NONE,    Writes_OF      | Writes_SF     | Writes_ZF     | Writes_AF     | Writes_PF     | Writes_CF     | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST1(not,              "not",              IUM_RW, 0x0010F6,                                                            INS_TT_NONE,    INS_FLAGS_None | INS_FLAGS_Has_Wbit | Encoding_REX2)
-
 INST1(cwde,             "cwde",             IUM_RD, 0x000098,                                                            INS_TT_NONE,    INS_FLAGS_None)
 INST1(cdq,              "cdq",              IUM_RD, 0x000099,                                                            INS_TT_NONE,    INS_FLAGS_None)
-INST1(idiv,             "idiv",             IUM_RD, 0x0038F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST1(imulEAX,          "imul",             IUM_RD, 0x0028F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit)
-INST1(div,              "div",              IUM_RD, 0x0030F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2)
-INST1(mulEAX,           "mul",              IUM_RD, 0x0020F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit | Encoding_REX2)
+INST1(idiv,             "idiv",             IUM_RD, 0x0038F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF)
+INST1(imulEAX,          "imul",             IUM_RD, 0x0028F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit | INS_Flags_Has_NF)
+INST1(div,              "div",              IUM_RD, 0x0030F6,                                                            INS_TT_NONE,    Undefined_OF   | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Undefined_CF  | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF)
+INST1(mulEAX,           "mul",              IUM_RD, 0x0020F6,                                                            INS_TT_NONE,    Writes_OF      | Undefined_SF  | Undefined_ZF  | Undefined_AF  | Undefined_PF  | Writes_CF     | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF)
 
 INST1(sahf,             "sahf",             IUM_RD, 0x00009E,                                                            INS_TT_NONE,    Restore_SF_ZF_AF_PF_CF)
 
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 134d5ffc99ca00..ab8c6495027003 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -369,8 +369,9 @@ RELEASE_CONFIG_INTEGER(EnableMultiRegLocals, "EnableMultiRegLocals", 1)
 RELEASE_CONFIG_INTEGER(JitNoInline, "JitNoInline", 0)
 
 #if defined(DEBUG)
-CONFIG_INTEGER(JitStressRex2Encoding, "JitStressRex2Encoding", 0) // Enable rex2 encoding for legacy instructions.
-CONFIG_INTEGER(JitBypassAPXCheck, "JitBypassAPXCheck", 0)         // Bypass APX CPUID check.
+CONFIG_INTEGER(JitStressRex2Encoding, "JitStressRex2Encoding", 0) // Enable rex2 encoding for compatible instructions.
+CONFIG_INTEGER(JitStressPromotedEvexEncoding, "JitStressPromotedEvexEncoding", 0) // Enable promoted EVEX encoding for
+                                                                                  // compatible instructions.
 #endif
 
 // clang-format off
@@ -440,6 +441,7 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sve,              "EnableArm64Sve",
 
 RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast,     "EnableEmbeddedBroadcast",   1) // Allows embedded broadcasts to be disabled
 RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking,       "EnableEmbeddedMasking",     1) // Allows embedded masking to be disabled
+RELEASE_CONFIG_INTEGER(EnableApxNDD,                "EnableApxNDD",              0) // Allows APX NDD feature to be disabled
 
 // clang-format on
 
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 60c830aad8d592..c833f2164fa0bc 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -2660,7 +2660,7 @@ bool Compiler::fgCreateFiltersForGenericExceptions()
             filterBb->bbCodeOffs = handlerBb->bbCodeOffs;
             filterBb->bbHndIndex = handlerBb->bbHndIndex;
             filterBb->bbTryIndex = handlerBb->bbTryIndex;
-            filterBb->bbSetRunRarely();
+            filterBb->inheritWeightPercentage(handlerBb, 0);
             filterBb->SetFlags(BBF_INTERNAL | BBF_DONT_REMOVE);
 
             handlerBb->bbCatchTyp = BBCT_FILTER_HANDLER;
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 724dd17082a4e3..3b022ca56c857b 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -511,6 +511,11 @@ GenTree* Lowering::LowerNode(GenTree* node)
         }
 #endif
         break;
+        case GT_NOT:
+#ifdef TARGET_ARM64
+            ContainCheckNot(node->AsOp());
+#endif
+            break;
         case GT_SELECT:
             return LowerSelect(node->AsConditional());
 
@@ -1066,11 +1071,11 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
         for (unsigned i = 0; i < jumpCnt - 1; ++i)
         {
             assert(currentBlock != nullptr);
-            BasicBlock* const targetBlock = jumpTab[i]->getDestinationBlock();
 
             // Remove the switch from the predecessor list of this case target's block.
             // We'll add the proper new predecessor edge later.
-            FlowEdge* const oldEdge = jumpTab[i];
+            FlowEdge* const   oldEdge     = jumpTab[i];
+            BasicBlock* const targetBlock = oldEdge->getDestinationBlock();
 
             // Compute the likelihood that this test is successful.
             // Divide by number of cases still sharing this edge (reduces likelihood)
@@ -1131,8 +1136,9 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
                 {
                     BasicBlock* const newBlock = comp->fgNewBBafter(BBJ_ALWAYS, currentBlock, true);
                     FlowEdge* const   newEdge  = comp->fgAddRefPred(newBlock, currentBlock);
-                    currentBlock               = newBlock;
-                    currentBBRange             = &LIR::AsRange(currentBlock);
+                    newBlock->inheritWeight(currentBlock);
+                    currentBlock   = newBlock;
+                    currentBBRange = &LIR::AsRange(currentBlock);
                     afterDefaultCondBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
                 }
 
@@ -1207,6 +1213,25 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
             currentBlock->RemoveFlags(BBF_DONT_REMOVE);
             comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
         }
+
+        // Update flow into switch targets
+        if (afterDefaultCondBlock->hasProfileWeight())
+        {
+            bool profileInconsistent = false;
+            for (unsigned i = 0; i < jumpCnt - 1; i++)
+            {
+                BasicBlock* const targetBlock = jumpTab[i]->getDestinationBlock();
+                targetBlock->setBBProfileWeight(targetBlock->computeIncomingWeight());
+                profileInconsistent |= (targetBlock->NumSucc() > 0);
+            }
+
+            if (profileInconsistent)
+            {
+                JITDUMP("Switch lowering: Flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
+                        afterDefaultCondBlock->bbNum, comp->fgPgoConsistent ? "is now" : "was already");
+                comp->fgPgoConsistent = false;
+            }
+        }
     }
     else
     {
@@ -1260,11 +1285,28 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
                 JITDUMP("Zero weight switch block " FMT_BB ", distributing likelihoods equally per case\n",
                         afterDefaultCondBlock->bbNum);
                 // jumpCnt-1 here because we peeled the default after copying this value.
-                weight_t const newLikelihood = 1.0 / (jumpCnt - 1);
+                weight_t const newLikelihood       = 1.0 / (jumpCnt - 1);
+                bool           profileInconsistent = false;
                 for (unsigned i = 0; i < successors.numDistinctSuccs; i++)
                 {
-                    FlowEdge* const edge = successors.nonDuplicates[i];
+                    FlowEdge* const edge          = successors.nonDuplicates[i];
+                    weight_t const  oldEdgeWeight = edge->getLikelyWeight();
                     edge->setLikelihood(newLikelihood * edge->getDupCount());
+                    weight_t const newEdgeWeight = edge->getLikelyWeight();
+
+                    if (afterDefaultCondBlock->hasProfileWeight())
+                    {
+                        BasicBlock* const targetBlock = edge->getDestinationBlock();
+                        targetBlock->increaseBBProfileWeight(newEdgeWeight - oldEdgeWeight);
+                        profileInconsistent |= (targetBlock->NumSucc() > 0);
+                    }
+                }
+
+                if (profileInconsistent)
+                {
+                    JITDUMP("Switch lowering: Flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
+                            afterDefaultCondBlock->bbNum, comp->fgPgoConsistent ? "is now" : "was already");
+                    comp->fgPgoConsistent = false;
                 }
             }
             else
@@ -1447,6 +1489,22 @@ bool Lowering::TryLowerSwitchToBitTest(FlowEdge*   jumpTable[],
 
     bbSwitch->SetCond(case1Edge, case0Edge);
 
+    //
+    // Update profile
+    //
+    if (bbSwitch->hasProfileWeight())
+    {
+        bbCase0->setBBProfileWeight(bbCase0->computeIncomingWeight());
+        bbCase1->setBBProfileWeight(bbCase1->computeIncomingWeight());
+
+        if ((bbCase0->NumSucc() > 0) || (bbCase1->NumSucc() > 0))
+        {
+            JITDUMP("TryLowerSwitchToBitTest: Flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
+                    bbSwitch->bbNum, comp->fgPgoConsistent ? "is now" : "was already");
+            comp->fgPgoConsistent = false;
+        }
+    }
+
     var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG;
     GenTree*  bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType);
 
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 659870c844cd73..7fa3aca9e98511 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -94,6 +94,7 @@ class Lowering final : public Phase
     insCflags TruthifyingFlags(GenCondition cond);
     void      ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
     void      ContainCheckNeg(GenTreeOp* neg);
+    void      ContainCheckNot(GenTreeOp* notOp);
     void      TryLowerCnsIntCselToCinc(GenTreeOp* select, GenTree* cond);
     void      TryLowerCselToCSOp(GenTreeOp* select, GenTree* cond);
     bool      TryLowerAddSubToMulLongOp(GenTreeOp* op, GenTree** next);
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index 471ec849686e92..852d912a133e78 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -304,7 +304,7 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN
             }
         }
 
-        if (childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ) && parentNode->OperIs(GT_AND_NOT))
+        if (childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ) && parentNode->OperIs(GT_NOT, GT_AND_NOT))
         {
             return true;
         }
@@ -3290,6 +3290,31 @@ void Lowering::ContainCheckNeg(GenTreeOp* neg)
     }
 }
 
+//------------------------------------------------------------------------
+// ContainCheckNot : determine whether the source of a not should be contained.
+//
+// Arguments:
+//    notOp - pointer to the node
+//
+void Lowering::ContainCheckNot(GenTreeOp* notOp)
+{
+    if (notOp->isContained())
+        return;
+
+    if (!varTypeIsIntegral(notOp))
+        return;
+
+    if ((notOp->gtFlags & GTF_SET_FLAGS))
+        return;
+
+    GenTree* childNode = notOp->gtGetOp1();
+    if (comp->opts.OptimizationEnabled() && childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ) &&
+        IsContainableUnaryOrBinaryOp(notOp, childNode))
+    {
+        MakeSrcContained(notOp, childNode);
+    }
+}
+
 //----------------------------------------------------------------------------------------------
 // TryLowerCselToCSOp: Try converting SELECT/SELECTCC to SELECT_?/SELECT_?CC. Conversion is possible only if
 // one of the operands of the select node is one of GT_NEG, GT_NOT or GT_ADD.
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 31785e2e052e49..a787fece19fb6d 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -13533,7 +13533,7 @@ PhaseStatus Compiler::fgMorphBlocks()
             if (!fgProfileWeightsConsistent(incomingWeight, fgEntryBB->bbWeight))
             {
                 JITDUMP("OSR: Original method entry " FMT_BB " has inconsistent weight. Data %s inconsistent.\n",
-                        fgPgoConsistent ? "is now" : "was already");
+                        fgEntryBB->bbNum, fgPgoConsistent ? "is now" : "was already");
                 fgPgoConsistent = false;
             }
         }
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index f427e3fc068d07..4642af455804a6 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -2240,6 +2240,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
     //
     bNewCond->inheritWeight(block);
 
+    const weight_t totalWeight = bTest->bbWeight;
+
     if (haveProfileWeights)
     {
         bTest->decreaseBBProfileWeight(block->bbWeight);
@@ -2300,6 +2302,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
         }
     }
 
+    const weight_t loopWeight    = bTest->bbWeight;
+    const weight_t nonLoopWeight = bNewCond->bbWeight;
+    if (haveProfileWeights && !fgProfileWeightsConsistent(totalWeight, loopWeight + nonLoopWeight))
+    {
+        JITDUMP("Redirecting flow from " FMT_BB " to " FMT_BB " introduced inconsistency. Data %s inconsistent.\n",
+                bTest->bbNum, bNewCond->bbNum, fgPgoConsistent ? "is now" : "was already");
+        fgPgoConsistent = false;
+    }
+
 #ifdef DEBUG
     if (verbose)
     {
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Dispensers/DispenserThatReusesAsLongAsKeyIsAlive.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Dispensers/DispenserThatReusesAsLongAsKeyIsAlive.cs
index d0879b3cd0d9c2..18ba9d7880978d 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Dispensers/DispenserThatReusesAsLongAsKeyIsAlive.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Reflection/Runtime/Dispensers/DispenserThatReusesAsLongAsKeyIsAlive.cs
@@ -14,23 +14,16 @@ internal sealed class DispenserThatReusesAsLongAsKeyIsAlive<K, [DynamicallyAcces
     {
         public DispenserThatReusesAsLongAsKeyIsAlive(Func<K, V> factory)
         {
-            _createValueCallback = CreateValue;
             _conditionalWeakTable = new ConditionalWeakTable<K, V>();
             _factory = factory;
         }
 
         public sealed override V GetOrAdd(K key)
         {
-            return _conditionalWeakTable.GetValue(key, _createValueCallback);
-        }
-
-        private V CreateValue(K key)
-        {
-            return _factory(key);
+            return _conditionalWeakTable.GetOrAdd(key, _factory);
         }
 
         private readonly Func<K, V> _factory;
         private readonly ConditionalWeakTable<K, V> _conditionalWeakTable;
-        private readonly ConditionalWeakTable<K, V>.CreateValueCallback _createValueCallback;
     }
 }
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs
index 87eb31d58022fe..38105ec0984fdb 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.NativeAot.cs
@@ -701,6 +701,13 @@ public void DisconnectTracker()
             }
         }
 
+        // Custom type instead of a value tuple to avoid rooting 'ITuple' and other value tuple stuff
+        private struct GetOrCreateComInterfaceForObjectParameters
+        {
+            public ComWrappers? This;
+            public CreateComInterfaceFlags Flags;
+        }
+
         /// <summary>
         /// Create a COM representation of the supplied object that can be passed to a non-managed environment.
         /// </summary>
@@ -716,18 +723,12 @@ public unsafe IntPtr GetOrCreateComInterfaceForObject(object instance, CreateCom
         {
             ArgumentNullException.ThrowIfNull(instance);
 
-            ManagedObjectWrapperHolder? managedObjectWrapper;
-            if (_managedObjectWrapperTable.TryGetValue(instance, out managedObjectWrapper))
+            ManagedObjectWrapperHolder managedObjectWrapper = _managedObjectWrapperTable.GetOrAdd(instance, static (c, items) =>
             {
-                managedObjectWrapper.AddRef();
-                return managedObjectWrapper.ComIp;
-            }
-
-            managedObjectWrapper = _managedObjectWrapperTable.GetValue(instance, (c) =>
-            {
-                ManagedObjectWrapper* value = CreateManagedObjectWrapper(c, flags);
+                ManagedObjectWrapper* value = items.This!.CreateManagedObjectWrapper(c, items.Flags);
                 return new ManagedObjectWrapperHolder(value, c);
-            });
+            }, new GetOrCreateComInterfaceForObjectParameters { This = this, Flags = flags });
+
             managedObjectWrapper.AddRef();
             return managedObjectWrapper.ComIp;
         }
@@ -1069,15 +1070,11 @@ private void RegisterWrapperForObject(NativeObjectWrapper wrapper, object comPro
             Debug.Assert(wrapper.ProxyHandle.Target == comProxy);
             Debug.Assert(wrapper.IsUniqueInstance || _rcwCache.FindProxyForComInstance(wrapper.ExternalComObject) == comProxy);
 
-            if (s_nativeObjectWrapperTable.TryGetValue(comProxy, out NativeObjectWrapper? registeredWrapper)
-                && registeredWrapper != wrapper)
-            {
-                Debug.Assert(registeredWrapper.ExternalComObject != wrapper.ExternalComObject);
-                wrapper.Release();
-                throw new NotSupportedException();
-            }
+            // Add the input wrapper bound to the COM proxy, if there isn't one already. If another thread raced
+            // against this one and this lost, we'd get the wrapper added from that thread instead.
+            NativeObjectWrapper registeredWrapper = s_nativeObjectWrapperTable.GetOrAdd(comProxy, wrapper);
 
-            registeredWrapper = GetValueFromRcwTable(comProxy, wrapper);
+            // We lost the race, so we cannot register the incoming wrapper with the target object
             if (registeredWrapper != wrapper)
             {
                 Debug.Assert(registeredWrapper.ExternalComObject != wrapper.ExternalComObject);
@@ -1091,9 +1088,6 @@ private void RegisterWrapperForObject(NativeObjectWrapper wrapper, object comPro
             // TrackerObjectManager and we could end up missing a section of the object graph.
             // This cache deduplicates, so it is okay that the wrapper will be registered multiple times.
             AddWrapperToReferenceTrackerHandleCache(registeredWrapper);
-
-            // Separate out into a local function to avoid the closure and delegate allocation unless we need it.
-            static NativeObjectWrapper GetValueFromRcwTable(object userObject, NativeObjectWrapper newWrapper) => s_nativeObjectWrapperTable.GetValue(userObject, _ => newWrapper);
         }
 
         private static void AddWrapperToReferenceTrackerHandleCache(NativeObjectWrapper wrapper)
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ObjectiveCMarshal.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ObjectiveCMarshal.NativeAot.cs
index ccb969f1728011..621c62d87db7d9 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ObjectiveCMarshal.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/ObjectiveCMarshal.NativeAot.cs
@@ -136,7 +136,7 @@ private static IntPtr CreateReferenceTrackingHandleInternal(
                 throw new InvalidOperationException(SR.InvalidOperation_ObjectiveCTypeNoFinalizer);
             }
 
-            var trackerInfo = s_objects.GetValue(obj, static o => new ObjcTrackingInformation());
+            var trackerInfo = s_objects.GetOrAdd(obj, static o => new ObjcTrackingInformation());
             trackerInfo.EnsureInitialized(obj);
             trackerInfo.GetTaggedMemory(out memInSizeT, out mem);
             return RuntimeImports.RhHandleAllocRefCounted(obj);
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
index 81c9482a557580..5ae29387480839 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs
@@ -70,7 +70,7 @@ public static unsafe IntPtr GetFunctionPointerForDelegate(Delegate del)
                 //
                 // Marshalling a managed delegate created from managed code into a native function pointer
                 //
-                return GetPInvokeDelegates().GetValue(del, s_AllocateThunk ??= AllocateThunk).Thunk;
+                return GetPInvokeDelegates().GetOrAdd(del, s_AllocateThunk ??= AllocateThunk).Thunk;
             }
         }
 
@@ -78,7 +78,7 @@ public static unsafe IntPtr GetFunctionPointerForDelegate(Delegate del)
         /// Used to lookup whether a delegate already has thunk allocated for it
         /// </summary>
         private static ConditionalWeakTable<Delegate, PInvokeDelegateThunk> s_pInvokeDelegates;
-        private static ConditionalWeakTable<Delegate, PInvokeDelegateThunk>.CreateValueCallback s_AllocateThunk;
+        private static Func<Delegate, PInvokeDelegateThunk> s_AllocateThunk;
 
         private static ConditionalWeakTable<Delegate, PInvokeDelegateThunk> GetPInvokeDelegates()
         {
diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Monitor.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Monitor.NativeAot.cs
index bed275c1ea995d..4c90bbaa42ad3e 100644
--- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Monitor.NativeAot.cs
+++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Monitor.NativeAot.cs
@@ -25,14 +25,14 @@ public static partial class Monitor
         #region Object->Lock/Condition mapping
 
         private static readonly ConditionalWeakTable<object, Condition> s_conditionTable = new ConditionalWeakTable<object, Condition>();
-        private static readonly ConditionalWeakTable<object, Condition>.CreateValueCallback s_createCondition = (o) => new Condition(ObjectHeader.GetLockObject(o));
+        private static readonly Func<object, Condition> s_createCondition = (o) => new Condition(ObjectHeader.GetLockObject(o));
 
         private static Condition GetCondition(object obj)
         {
             Debug.Assert(
                 !(obj is Condition),
                 "Do not use Monitor.Pulse or Wait on a Condition instance; use the methods on Condition instead.");
-            return s_conditionTable.GetValue(obj, s_createCondition);
+            return s_conditionTable.GetOrAdd(obj, s_createCondition);
         }
         #endregion
 
diff --git a/src/coreclr/pal/src/config.h.in b/src/coreclr/pal/src/config.h.in
index a3c83373d6d955..a47712a855fe46 100644
--- a/src/coreclr/pal/src/config.h.in
+++ b/src/coreclr/pal/src/config.h.in
@@ -13,12 +13,12 @@
 #cmakedefine01 HAVE_CRT_EXTERNS_H
 #cmakedefine01 HAVE_SYS_TIME_H
 #cmakedefine01 HAVE_PTHREAD_NP_H
+#cmakedefine01 HAVE_SYS_MEMBARRIER_H
 #cmakedefine01 HAVE_SYS_LWP_H
 #cmakedefine01 HAVE_LWP_H
 #cmakedefine01 HAVE_RUNETYPE_H
 #cmakedefine01 HAVE_GNU_LIBNAMES_H
 #cmakedefine01 HAVE_PRCTL_H
-#cmakedefine01 HAVE_PTHREAD_NP_H
 #cmakedefine01 HAVE_AUXV_HWCAP_H
 #cmakedefine01 HAVE_SYS_PTRACE_H
 #cmakedefine01 HAVE_SYS_UCONTEXT_H
diff --git a/src/coreclr/pal/src/configure.cmake b/src/coreclr/pal/src/configure.cmake
index bc4f3258b44b3a..4ba3ab6f53ce0d 100644
--- a/src/coreclr/pal/src/configure.cmake
+++ b/src/coreclr/pal/src/configure.cmake
@@ -40,6 +40,7 @@ check_include_files(procfs.h HAVE_PROCFS_H)
 check_include_files(crt_externs.h HAVE_CRT_EXTERNS_H)
 check_include_files(sys/time.h HAVE_SYS_TIME_H)
 check_include_files(pthread_np.h HAVE_PTHREAD_NP_H)
+check_include_files(sys/membarrier.h HAVE_SYS_MEMBARRIER_H)
 check_include_files(sys/lwp.h HAVE_SYS_LWP_H)
 check_include_files(lwp.h HAVE_LWP_H)
 check_include_files(runetype.h HAVE_RUNETYPE_H)
diff --git a/src/coreclr/pal/src/thread/process.cpp b/src/coreclr/pal/src/thread/process.cpp
index b50838750c478f..20c09eb357dd2e 100644
--- a/src/coreclr/pal/src/thread/process.cpp
+++ b/src/coreclr/pal/src/thread/process.cpp
@@ -64,7 +64,11 @@ SET_DEFAULT_DEBUG_CHANNEL(PROCESS); // some headers have code with asserts, so d
 #include <vector>
 
 #ifdef __linux__
-#include <sys/syscall.h> // __NR_membarrier
+#include <linux/membarrier.h>
+#include <sys/syscall.h>
+#define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__)
+#elif HAVE_SYS_MEMBARRIER_H
+#include <sys/membarrier.h>
 #endif
 
 #ifdef __APPLE__
@@ -125,27 +129,6 @@ CObjectType CorUnix::otProcess(
                 CObjectType::NoOwner
                 );
 
-//
-// Helper membarrier function
-//
-#ifdef __NR_membarrier
-# define membarrier(...)  syscall(__NR_membarrier, __VA_ARGS__)
-#else
-# define membarrier(...)  -ENOSYS
-#endif
-
-enum membarrier_cmd
-{
-    MEMBARRIER_CMD_QUERY                                 = 0,
-    MEMBARRIER_CMD_GLOBAL                                = (1 << 0),
-    MEMBARRIER_CMD_GLOBAL_EXPEDITED                      = (1 << 1),
-    MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED             = (1 << 2),
-    MEMBARRIER_CMD_PRIVATE_EXPEDITED                     = (1 << 3),
-    MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED            = (1 << 4),
-    MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE           = (1 << 5),
-    MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE  = (1 << 6)
-};
-
 //
 // Tracks if the OS supports FlushProcessWriteBuffers using membarrier
 //
@@ -2581,19 +2564,21 @@ InitializeFlushProcessWriteBuffers()
     _ASSERTE(s_helperPage == 0);
     _ASSERTE(s_flushUsingMemBarrier == 0);
 
+#if defined(__linux__) || HAVE_SYS_MEMBARRIER_H
     // Starting with Linux kernel 4.14, process memory barriers can be generated
     // using MEMBARRIER_CMD_PRIVATE_EXPEDITED.
-    int mask = membarrier(MEMBARRIER_CMD_QUERY, 0);
+    int mask = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
     if (mask >= 0 &&
         mask & MEMBARRIER_CMD_PRIVATE_EXPEDITED)
     {
         // Register intent to use the private expedited command.
-        if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0) == 0)
+        if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0) == 0)
         {
             s_flushUsingMemBarrier = TRUE;
             return TRUE;
         }
     }
+#endif
 
 #ifdef TARGET_APPLE
     return TRUE;
@@ -2649,12 +2634,15 @@ VOID
 PALAPI
 FlushProcessWriteBuffers()
 {
+#if defined(__linux__) || HAVE_SYS_MEMBARRIER_H
     if (s_flushUsingMemBarrier)
     {
-        int status = membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
+        int status = membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0);
         FATAL_ASSERT(status == 0, "Failed to flush using membarrier");
     }
-    else if (s_helperPage != 0)
+    else
+#endif
+    if (s_helperPage != 0)
     {
         int status = pthread_mutex_lock(&flushProcessWriteBuffersMutex);
         FATAL_ASSERT(status == 0, "Failed to lock the flushProcessWriteBuffersMutex lock");
diff --git a/src/coreclr/tools/aot/ILCompiler.Build.Tasks/ComputeManagedAssembliesToCompileToNative.cs b/src/coreclr/tools/aot/ILCompiler.Build.Tasks/ComputeManagedAssembliesToCompileToNative.cs
index 6d278e4b9597a8..e4c5332acaa21a 100644
--- a/src/coreclr/tools/aot/ILCompiler.Build.Tasks/ComputeManagedAssembliesToCompileToNative.cs
+++ b/src/coreclr/tools/aot/ILCompiler.Build.Tasks/ComputeManagedAssembliesToCompileToNative.cs
@@ -120,6 +120,8 @@ public override bool Execute()
             {
                 // In the case of disk-based assemblies, this holds the file path
                 string itemSpec = taskItem.ItemSpec;
+                string assemblyFileName = Path.GetFileName(itemSpec);
+                bool isFromRuntimePack = taskItem.GetMetadata("NuGetPackageId")?.StartsWith("Microsoft.NETCore.App.Runtime.", StringComparison.OrdinalIgnoreCase) == true;
 
                 // Skip the native apphost (whose name ends up colliding with the native output binary) and supporting libraries
                 if (itemSpec.EndsWith(DotNetAppHostExecutableName, StringComparison.OrdinalIgnoreCase) || itemSpec.Contains(DotNetHostFxrLibraryName) || itemSpec.Contains(DotNetHostPolicyLibraryName))
@@ -128,55 +130,39 @@ public override bool Execute()
                     continue;
                 }
 
-                // Prototype aid - remove the native CoreCLR runtime pieces from the publish folder
-                if (itemSpec.IndexOf("microsoft.netcore.app", StringComparison.OrdinalIgnoreCase) != -1 && (itemSpec.Contains("\\native\\") || itemSpec.Contains("/native/")))
+                if (isFromRuntimePack && taskItem.GetMetadata("AssetType")?.Equals("native", StringComparison.OrdinalIgnoreCase) == true)
                 {
+                    // Skip the native components of the runtime pack, we don't need them for NativeAOT.
                     assembliesToSkipPublish.Add(taskItem);
                     continue;
                 }
 
-                var assemblyFileName = Path.GetFileName(itemSpec);
-
-                if (assemblyFileName == "WindowsBase.dll")
-                {
-                    // There are two instances of WindowsBase.dll, one small one, in the NativeAOT framework
-                    // and real one in WindowsDesktop SDK. We want to make sure that if both are present,
-                    // we will use the one from WindowsDesktop SDK, and not from NativeAOT framework.
-                    foreach (ITaskItem taskItemToSkip in FrameworkAssemblies)
-                    {
-                        if (Path.GetFileName(taskItemToSkip.ItemSpec) == assemblyFileName)
-                        {
-                            assembliesToSkipPublish.Add(taskItemToSkip);
-                            break;
-                        }
-                    }
-
-                    assembliesToSkipPublish.Add(taskItem);
-                    list.Add(taskItem);
-                    continue;
-                }
-
                 // Remove any assemblies whose implementation we want to come from NativeAOT's package.
                 // Currently that's System.Private.* SDK assemblies and a bunch of framework assemblies.
                 if (nativeAotFrameworkAssembliesToUse.TryGetValue(assemblyFileName, out ITaskItem frameworkItem))
                 {
-                    if (GetFileVersion(itemSpec).CompareTo(GetFileVersion(frameworkItem.ItemSpec)) > 0)
+                    // If the assembly is part of the Microsoft.NETCore.App.Runtime runtime pack, we want to swap it with the corresponding package from the NativeAOT SDK.
+                    // Otherwise we want to use the assembly the user has referenced.
+                    if (!isFromRuntimePack)
                     {
-                        if (assemblyFileName == "System.Private.CoreLib.dll")
-                        {
-                            Log.LogError($"Overriding System.Private.CoreLib.dll with a newer version is not supported. Attempted to use {itemSpec} instead of {frameworkItem.ItemSpec}.");
-                        }
-                        else
-                        {
-                            // Allow OOB references with higher version to take precedence over the framework assemblies.
-                            list.Add(taskItem);
-                        }
+                        // The assembly was overridden by an OOB package through standard .NET SDK conflict resolution.
+                        // Use that version instead of swapping to the NativeAOT one.
+                        list.Add(taskItem);
+                    }
+                    else if (assemblyFileName == "System.Private.CoreLib.dll" && GetFileVersion(itemSpec).CompareTo(GetFileVersion(frameworkItem.ItemSpec)) > 0)
+                    {
+                        // Validate that we aren't trying to use an older NativeAOT package against a newer non-NativeAOT runtime pack.
+                        // That's not supported.
+                        Log.LogError($"Overriding System.Private.CoreLib.dll with a newer version is not supported. Attempted to use {itemSpec} instead of {frameworkItem.ItemSpec}.");
                     }
 
                     assembliesToSkipPublish.Add(taskItem);
                     continue;
                 }
 
+                // For all other files, check if they are managed assemblies.
+                // If they're managed, skip publishing them and categorize them correctly as inputs to ILC.
+                // If they're not managed assemblies, then they're native dependencies. Allow them to be published.
                 try
                 {
                     using (FileStream moduleStream = File.OpenRead(itemSpec))
diff --git a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
index 68d878fa1bacfd..fb6d6ddb0e990e 100644
--- a/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
+++ b/src/libraries/Common/tests/TestUtilities/System/PlatformDetection.cs
@@ -142,8 +142,17 @@ public static int SlowRuntimeTimeoutModifier
 
         public static bool IsStartingProcessesSupported => !IsiOS && !IstvOS;
 
-        public static bool IsSpeedOptimized => !IsSizeOptimized;
-        public static bool IsSizeOptimized => IsBrowser || IsWasi || IsAndroid || IsAppleMobile;
+        public static bool IsLinqSpeedOptimized => !IsLinqSizeOptimized;
+        public static bool IsLinqSizeOptimized => s_linqIsSizeOptimized.Value;
+        private static readonly Lazy<bool> s_linqIsSizeOptimized = new Lazy<bool>(ComputeIsLinqSizeOptimized);
+        private static bool ComputeIsLinqSizeOptimized()
+        {
+#if NET
+            return (bool)typeof(Enumerable).GetMethod("get_IsSizeOptimized", BindingFlags.NonPublic | BindingFlags.Static).Invoke(null, Array.Empty<object>());
+#else
+            return false;
+#endif
+        }
 
         public static bool IsBrowserDomSupported => IsEnvironmentVariableTrue("IsBrowserDomSupported");
         public static bool IsBrowserDomSupportedOrNotBrowser => IsNotBrowser || IsBrowserDomSupported;
diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs
index b4623e653af84d..dbb91fc5523de0 100644
--- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs
+++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_LargeFiles.cs
@@ -9,7 +9,7 @@ namespace System.IO.Compression.Tests;
 [Collection(nameof(DisableParallelization))]
 public class zip_LargeFiles : ZipFileTestBase
 {
-    [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
+    [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
     [OuterLoop("It requires almost 12 GB of free disk space")]
     public static void UnzipOver4GBZipFile()
     {
@@ -49,7 +49,7 @@ private static void FillWithHardToCompressData(byte[] buffer)
         Random.Shared.NextBytes(buffer);
     }
 
-    [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
+    [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
     [OuterLoop("It requires 5~6 GB of free disk space and a lot of CPU time for compressed tests")]
     [InlineData(false)]
     [InlineData(true)]
diff --git a/src/libraries/System.IO.Packaging/tests/LargeFilesTests.Net.cs b/src/libraries/System.IO.Packaging/tests/LargeFilesTests.Net.cs
index c073c09b8f92f7..ad93cf44172ee5 100644
--- a/src/libraries/System.IO.Packaging/tests/LargeFilesTests.Net.cs
+++ b/src/libraries/System.IO.Packaging/tests/LargeFilesTests.Net.cs
@@ -15,7 +15,7 @@ private static void FillWithHardToCompressData(byte[] buffer)
         Random.Shared.NextBytes(buffer);
     }
 
-    [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
+    [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsNotMobile), nameof(PlatformDetection.Is64BitProcess))] // don't run it on slower runtimes
     [InlineData(false)]
     [InlineData(true)]
     [OuterLoop("It requires 5~6 GB of free disk space and a lot of CPU time for compressed tests")]
diff --git a/src/libraries/System.Linq/src/System.Linq.csproj b/src/libraries/System.Linq/src/System.Linq.csproj
index 6bea440207f3a2..cbbec2028cf8d7 100644
--- a/src/libraries/System.Linq/src/System.Linq.csproj
+++ b/src/libraries/System.Linq/src/System.Linq.csproj
@@ -1,98 +1,85 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>$(NetCoreAppCurrent);$(NetCoreAppCurrent)-browser;$(NetCoreAppCurrent)-wasi;$(NetCoreAppCurrent)-android;$(NetCoreAppCurrent)-ios;$(NetCoreAppCurrent)-tvos</TargetFrameworks>
+    <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
     <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
   </PropertyGroup>
 
-  <!-- DesignTimeBuild requires all the TargetFramework Derived Properties to not be present in the first property group. -->
-  <PropertyGroup>
-    <TargetPlatformIdentifier>$([MSBuild]::GetTargetPlatformIdentifier('$(TargetFramework)'))</TargetPlatformIdentifier>
-    <OptimizeForSize Condition="'$(TargetPlatformIdentifier)' == 'browser' or '$(TargetPlatformIdentifier)' == 'android' or '$(TargetPlatformIdentifier)' == 'ios' or '$(TargetPlatformIdentifier)' == 'tvos'">true</OptimizeForSize>
-    <DefineConstants Condition="'$(OptimizeForSize)' == 'true'">$(DefineConstants);OPTIMIZE_FOR_SIZE</DefineConstants>
-  </PropertyGroup>
-
-  <ItemGroup Condition="'$(OptimizeForSize)' == true">
-    <Compile Include="System\Linq\Skip.SizeOpt.cs" />
-    <Compile Include="System\Linq\Take.SizeOpt.cs" />
-  </ItemGroup>
-
-  <ItemGroup Condition="'$(OptimizeForSize)' != true">
-    <Compile Include="System\Linq\AppendPrepend.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Cast.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Concat.SpeedOpt.cs" />
-    <Compile Include="System\Linq\DefaultIfEmpty.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Distinct.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Grouping.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Iterator.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Lookup.SpeedOpt.cs" />
-    <Compile Include="System\Linq\OfType.SpeedOpt.cs" />
-    <Compile Include="System\Linq\OrderedEnumerable.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Range.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Repeat.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Reverse.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Select.SpeedOpt.cs" />
-    <Compile Include="System\Linq\SelectMany.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Skip.SpeedOpt.cs" />
-    <Compile Include="System\Linq\SkipTake.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Take.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Union.SpeedOpt.cs" />
-    <Compile Include="System\Linq\Where.SpeedOpt.cs" />
-  </ItemGroup>
-
   <ItemGroup>
     <Compile Include="System\Linq\Aggregate.cs" />
     <Compile Include="System\Linq\AnyAll.cs" />
     <Compile Include="System\Linq\AppendPrepend.cs" />
+    <Compile Include="System\Linq\AppendPrepend.SpeedOpt.cs" />
     <Compile Include="System\Linq\Average.cs" />
     <Compile Include="System\Linq\Cast.cs" />
+    <Compile Include="System\Linq\Cast.SpeedOpt.cs" />
     <Compile Include="System\Linq\Chunk.cs" />
     <Compile Include="System\Linq\Concat.cs" />
+    <Compile Include="System\Linq\Concat.SpeedOpt.cs" />
     <Compile Include="System\Linq\Contains.cs" />
     <Compile Include="System\Linq\AggregateBy.cs" />
     <Compile Include="System\Linq\CountBy.cs" />
     <Compile Include="System\Linq\Count.cs" />
     <Compile Include="System\Linq\DebugView.cs" />
     <Compile Include="System\Linq\DefaultIfEmpty.cs" />
+    <Compile Include="System\Linq\DefaultIfEmpty.SpeedOpt.cs" />
     <Compile Include="System\Linq\Distinct.cs" />
+    <Compile Include="System\Linq\Distinct.SpeedOpt.cs" />
     <Compile Include="System\Linq\ElementAt.cs" />
     <Compile Include="System\Linq\Enumerable.cs" />
     <Compile Include="System\Linq\Except.cs" />
     <Compile Include="System\Linq\First.cs" />
     <Compile Include="System\Linq\Grouping.cs" />
+    <Compile Include="System\Linq\Grouping.SpeedOpt.cs" />
     <Compile Include="System\Linq\GroupJoin.cs" />
     <Compile Include="System\Linq\Index.cs" />
     <Compile Include="System\Linq\Intersect.cs" />
     <Compile Include="System\Linq\Iterator.cs" />
+    <Compile Include="System\Linq\Iterator.SpeedOpt.cs" />
     <Compile Include="System\Linq\Join.cs" />
     <Compile Include="System\Linq\Last.cs" />
     <Compile Include="System\Linq\LeftJoin.cs" />
     <Compile Include="System\Linq\Lookup.cs" />
+    <Compile Include="System\Linq\Lookup.SpeedOpt.cs" />
     <Compile Include="System\Linq\Max.cs" />
     <Compile Include="System\Linq\MaxMin.cs" />
     <Compile Include="System\Linq\Min.cs" />
     <Compile Include="System\Linq\OfType.cs" />
+    <Compile Include="System\Linq\OfType.SpeedOpt.cs" />
     <Compile Include="System\Linq\OrderBy.cs" />
     <Compile Include="System\Linq\OrderedEnumerable.cs" />
+    <Compile Include="System\Linq\OrderedEnumerable.SpeedOpt.cs" />
     <Compile Include="System\Linq\PartialArrayEnumerator.cs" />
     <Compile Include="System\Linq\Range.cs" />
+    <Compile Include="System\Linq\Range.SpeedOpt.cs" />
     <Compile Include="System\Linq\Repeat.cs" />
+    <Compile Include="System\Linq\Repeat.SpeedOpt.cs" />
     <Compile Include="System\Linq\Reverse.cs" />
+    <Compile Include="System\Linq\Reverse.SpeedOpt.cs" />
     <Compile Include="System\Linq\RightJoin.cs" />
     <Compile Include="System\Linq\SegmentedArrayBuilder.cs" />
     <Compile Include="System\Linq\Select.cs" />
+    <Compile Include="System\Linq\Select.SpeedOpt.cs" />
     <Compile Include="System\Linq\SelectMany.cs" />
+    <Compile Include="System\Linq\SelectMany.SpeedOpt.cs" />
     <Compile Include="System\Linq\SequenceEqual.cs" />
     <Compile Include="System\Linq\Single.cs" />
     <Compile Include="System\Linq\SingleLinkedNode.cs" />
     <Compile Include="System\Linq\Skip.cs" />
+    <Compile Include="System\Linq\Skip.SizeOpt.cs" />
+    <Compile Include="System\Linq\Skip.SpeedOpt.cs" />
+    <Compile Include="System\Linq\SkipTake.SpeedOpt.cs" />
     <Compile Include="System\Linq\Sum.cs" />
     <Compile Include="System\Linq\Take.cs" />
+    <Compile Include="System\Linq\Take.SizeOpt.cs" />
+    <Compile Include="System\Linq\Take.SpeedOpt.cs" />
     <Compile Include="System\Linq\ThrowHelper.cs" />
     <Compile Include="System\Linq\ToCollection.cs" />
     <Compile Include="System\Linq\Union.cs" />
+    <Compile Include="System\Linq\Union.SpeedOpt.cs" />
     <Compile Include="System\Linq\Utilities.cs" />
     <Compile Include="System\Linq\Where.cs" />
+    <Compile Include="System\Linq\Where.SpeedOpt.cs" />
     <Compile Include="System\Linq\Zip.cs" />
   </ItemGroup>
 
diff --git a/src/libraries/System.Linq/src/System/Linq/AnyAll.cs b/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
index b236c6a6f1ac64..2e10e25db1c5a3 100644
--- a/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
+++ b/src/libraries/System.Linq/src/System/Linq/AnyAll.cs
@@ -20,8 +20,7 @@ public static bool Any<TSource>(this IEnumerable<TSource> source)
                 return gc.Count != 0;
             }
 
-#if !OPTIMIZE_FOR_SIZE
-            if (source is Iterator<TSource> iterator)
+            if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 int count = iterator.GetCount(onlyIfCheap: true);
                 if (count >= 0)
@@ -32,7 +31,6 @@ public static bool Any<TSource>(this IEnumerable<TSource> source)
                 iterator.TryGetFirst(out bool found);
                 return found;
             }
-#endif
 
             if (source is ICollection ngc)
             {
diff --git a/src/libraries/System.Linq/src/System/Linq/Count.cs b/src/libraries/System.Linq/src/System/Linq/Count.cs
index 85bb624800d9d3..6a12a11cbe163d 100644
--- a/src/libraries/System.Linq/src/System/Linq/Count.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Count.cs
@@ -20,12 +20,10 @@ public static int Count<TSource>(this IEnumerable<TSource> source)
                 return collectionoft.Count;
             }
 
-#if !OPTIMIZE_FOR_SIZE
-            if (source is Iterator<TSource> iterator)
+            if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 return iterator.GetCount(onlyIfCheap: false);
             }
-#endif
 
             if (source is ICollection collection)
             {
@@ -115,8 +113,7 @@ public static bool TryGetNonEnumeratedCount<TSource>(this IEnumerable<TSource> s
                 return true;
             }
 
-#if !OPTIMIZE_FOR_SIZE
-            if (source is Iterator<TSource> iterator)
+            if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 int c = iterator.GetCount(onlyIfCheap: true);
                 if (c >= 0)
@@ -125,7 +122,6 @@ public static bool TryGetNonEnumeratedCount<TSource>(this IEnumerable<TSource> s
                     return true;
                 }
             }
-#endif
 
             if (source is ICollection collection)
             {
diff --git a/src/libraries/System.Linq/src/System/Linq/ElementAt.cs b/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
index 97b87f9eba9999..26c69366fa9f3b 100644
--- a/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/ElementAt.cs
@@ -23,9 +23,7 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, int i
 
             bool found;
             TSource? element =
-#if !OPTIMIZE_FOR_SIZE
-                source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
-#endif
+                !IsSizeOptimized && source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
                 TryGetElementAtNonIterator(source, index, out found);
 
             if (!found)
@@ -123,9 +121,7 @@ public static TSource ElementAt<TSource>(this IEnumerable<TSource> source, Index
             }
 
             return
-#if !OPTIMIZE_FOR_SIZE
-                source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
-#endif
+                !IsSizeOptimized && source is Iterator<TSource> iterator ? iterator.TryGetElementAt(index, out found) :
                 TryGetElementAtNonIterator(source, index, out found);
         }
 
diff --git a/src/libraries/System.Linq/src/System/Linq/Enumerable.cs b/src/libraries/System.Linq/src/System/Linq/Enumerable.cs
index fbdf9a12a28b65..8e5571ddda534e 100644
--- a/src/libraries/System.Linq/src/System/Linq/Enumerable.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Enumerable.cs
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 
@@ -9,6 +10,9 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
+        [FeatureSwitchDefinition("System.Linq.Enumerable.IsSizeOptimized")]
+        internal static bool IsSizeOptimized { get; } = AppContext.TryGetSwitch("System.Linq.Enumerable.IsSizeOptimized", out bool isEnabled) ? isEnabled : false;
+
         public static IEnumerable<TSource> AsEnumerable<TSource>(this IEnumerable<TSource> source) => source;
 
         /// <summary>Returns an empty <see cref="IEnumerable{TResult}"/>.</summary>
diff --git a/src/libraries/System.Linq/src/System/Linq/First.cs b/src/libraries/System.Linq/src/System/Linq/First.cs
index fc97996d480dd7..814943c94438d8 100644
--- a/src/libraries/System.Linq/src/System/Linq/First.cs
+++ b/src/libraries/System.Linq/src/System/Linq/First.cs
@@ -70,9 +70,7 @@ public static TSource FirstOrDefault<TSource>(this IEnumerable<TSource> source,
             }
 
             return
-#if !OPTIMIZE_FOR_SIZE
-                source is Iterator<TSource> iterator ? iterator.TryGetFirst(out found) :
-#endif
+                !IsSizeOptimized && source is Iterator<TSource> iterator ? iterator.TryGetFirst(out found) :
                 TryGetFirstNonIterator(source, out found);
         }
 
diff --git a/src/libraries/System.Linq/src/System/Linq/Iterator.cs b/src/libraries/System.Linq/src/System/Linq/Iterator.cs
index 8d5982eb0b9b49..00da01cb25cb80 100644
--- a/src/libraries/System.Linq/src/System/Linq/Iterator.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Iterator.cs
@@ -88,11 +88,9 @@ public Iterator<TSource> GetEnumerator()
             /// <typeparam name="TResult">The type of the mapped items.</typeparam>
             /// <param name="selector">The selector used to map each item.</param>
             public virtual IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
-#if OPTIMIZE_FOR_SIZE
-                new IEnumerableSelectIterator<TSource, TResult>(this, selector);
-#else
-                new IteratorSelectIterator<TSource, TResult>(this, selector);
-#endif
+                !IsSizeOptimized
+                ? new IteratorSelectIterator<TSource, TResult>(this, selector)
+                : new IEnumerableSelectIterator<TSource, TResult>(this, selector);
 
 
             /// <summary>
diff --git a/src/libraries/System.Linq/src/System/Linq/Last.cs b/src/libraries/System.Linq/src/System/Linq/Last.cs
index 9de082deffa7a5..ca48475259d8e5 100644
--- a/src/libraries/System.Linq/src/System/Linq/Last.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Last.cs
@@ -69,9 +69,7 @@ public static TSource LastOrDefault<TSource>(this IEnumerable<TSource> source, F
             }
 
             return
-#if !OPTIMIZE_FOR_SIZE
-                source is Iterator<TSource> iterator ? iterator.TryGetLast(out found) :
-#endif
+                !IsSizeOptimized && source is Iterator<TSource> iterator ? iterator.TryGetLast(out found) :
                 TryGetLastNonIterator(source, out found);
         }
 
diff --git a/src/libraries/System.Linq/src/System/Linq/Skip.SizeOpt.cs b/src/libraries/System.Linq/src/System/Linq/Skip.SizeOpt.cs
index 3704526903ece1..13e6642ee1fc02 100644
--- a/src/libraries/System.Linq/src/System/Linq/Skip.SizeOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Skip.SizeOpt.cs
@@ -7,7 +7,7 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private static IEnumerable<TSource> SkipIterator<TSource>(IEnumerable<TSource> source, int count)
+        private static IEnumerable<TSource> SizeOptimizedSkipIterator<TSource>(IEnumerable<TSource> source, int count)
         {
             using IEnumerator<TSource> e = source.GetEnumerator();
             while (count > 0 && e.MoveNext()) count--;
diff --git a/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
index 74ff73a068242c..4de9afbbbfb47b 100644
--- a/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Skip.SpeedOpt.cs
@@ -7,7 +7,7 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private static IEnumerable<TSource> SkipIterator<TSource>(IEnumerable<TSource> source, int count) =>
+        private static IEnumerable<TSource> SpeedOptimizedSkipIterator<TSource>(IEnumerable<TSource> source, int count) =>
             source is IList<TSource> sourceList ?
                 (IEnumerable<TSource>)new IListSkipTakeIterator<TSource>(sourceList, count, int.MaxValue) :
                 new IEnumerableSkipTakeIterator<TSource>(source, count, -1);
diff --git a/src/libraries/System.Linq/src/System/Linq/Skip.cs b/src/libraries/System.Linq/src/System/Linq/Skip.cs
index 6b565670fe6903..ac8252a07c6d0c 100644
--- a/src/libraries/System.Linq/src/System/Linq/Skip.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Skip.cs
@@ -30,14 +30,12 @@ public static IEnumerable<TSource> Skip<TSource>(this IEnumerable<TSource> sourc
 
                 count = 0;
             }
-#if !OPTIMIZE_FOR_SIZE
-            else if (source is Iterator<TSource> iterator)
+            else if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 return iterator.Skip(count) ?? Empty<TSource>();
             }
-#endif
 
-            return SkipIterator(source, count);
+            return IsSizeOptimized ? SizeOptimizedSkipIterator(source, count) : SpeedOptimizedSkipIterator(source, count);
         }
 
         public static IEnumerable<TSource> SkipWhile<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs b/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
index f61b01ee57773a..6f2bd0d9b0fa6b 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.SizeOpt.cs
@@ -8,7 +8,7 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> source, int count)
+        private static IEnumerable<TSource> SizeOptimizedTakeIterator<TSource>(IEnumerable<TSource> source, int count)
         {
             Debug.Assert(count > 0);
 
@@ -19,7 +19,7 @@ private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> s
             }
         }
 
-        private static IEnumerable<TSource> TakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
+        private static IEnumerable<TSource> SizeOptimizedTakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
         {
             Debug.Assert(source is not null);
             Debug.Assert(startIndex >= 0 && startIndex < endIndex);
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs b/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
index 81025ee21d0b4b..b5ec96ab3a0f12 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.SpeedOpt.cs
@@ -8,7 +8,7 @@ namespace System.Linq
 {
     public static partial class Enumerable
     {
-        private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> source, int count)
+        private static IEnumerable<TSource> SpeedOptimizedTakeIterator<TSource>(IEnumerable<TSource> source, int count)
         {
             Debug.Assert(source is not null && !IsEmptyArray(source));
             Debug.Assert(count > 0);
@@ -19,7 +19,7 @@ private static IEnumerable<TSource> TakeIterator<TSource>(IEnumerable<TSource> s
                 new IEnumerableSkipTakeIterator<TSource>(source, 0, count - 1);
         }
 
-        private static IEnumerable<TSource> TakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
+        private static IEnumerable<TSource> SpeedOptimizedTakeRangeIterator<TSource>(IEnumerable<TSource> source, int startIndex, int endIndex)
         {
             Debug.Assert(source is not null && !IsEmptyArray(source));
             Debug.Assert(startIndex >= 0 && startIndex < endIndex);
diff --git a/src/libraries/System.Linq/src/System/Linq/Take.cs b/src/libraries/System.Linq/src/System/Linq/Take.cs
index 65feb8a3eb08b8..9df5fbc8a2bec8 100644
--- a/src/libraries/System.Linq/src/System/Linq/Take.cs
+++ b/src/libraries/System.Linq/src/System/Linq/Take.cs
@@ -15,9 +15,12 @@ public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> sourc
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-            return count <= 0 || IsEmptyArray(source) ?
-                [] :
-                TakeIterator(source, count);
+            if (count <= 0 || IsEmptyArray(source))
+            {
+                return [];
+            }
+
+            return IsSizeOptimized ? SizeOptimizedTakeIterator(source, count) : SpeedOptimizedTakeIterator(source, count);
         }
 
         /// <summary>Returns a specified range of contiguous elements from a sequence.</summary>
@@ -60,9 +63,12 @@ public static IEnumerable<TSource> Take<TSource>(this IEnumerable<TSource> sourc
             }
             else if (!isEndIndexFromEnd)
             {
-                return startIndex >= endIndex ?
-                    [] :
-                    TakeRangeIterator(source, startIndex, endIndex);
+                if (startIndex >= endIndex)
+                {
+                    return [];
+                }
+
+                return IsSizeOptimized ? SizeOptimizedTakeRangeIterator(source, startIndex, endIndex) : SpeedOptimizedTakeRangeIterator(source, startIndex, endIndex);
             }
 
             return TakeRangeFromEndIterator(source, isStartIndexFromEnd, startIndex, isEndIndexFromEnd, endIndex);
@@ -88,7 +94,10 @@ private static IEnumerable<TSource> TakeRangeFromEndIterator<TSource>(IEnumerabl
 
                 if (startIndex < endIndex)
                 {
-                    foreach (TSource element in TakeRangeIterator(source, startIndex, endIndex))
+                    IEnumerable<TSource> rangeIterator = IsSizeOptimized
+                        ? SizeOptimizedTakeRangeIterator(source, startIndex, endIndex)
+                        : SpeedOptimizedTakeRangeIterator(source, startIndex, endIndex);
+                    foreach (TSource element in rangeIterator)
                     {
                         yield return element;
                     }
diff --git a/src/libraries/System.Linq/src/System/Linq/ToCollection.cs b/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
index 05e18b2382c8c6..c6654b6e4fff1b 100644
--- a/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
+++ b/src/libraries/System.Linq/src/System/Linq/ToCollection.cs
@@ -11,12 +11,10 @@ public static partial class Enumerable
     {
         public static TSource[] ToArray<TSource>(this IEnumerable<TSource> source)
         {
-#if !OPTIMIZE_FOR_SIZE
-            if (source is Iterator<TSource> iterator)
+            if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 return iterator.ToArray();
             }
-#endif
 
             if (source is ICollection<TSource> collection)
             {
@@ -64,12 +62,10 @@ public static List<TSource> ToList<TSource>(this IEnumerable<TSource> source)
                 ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
             }
 
-#if !OPTIMIZE_FOR_SIZE
-            if (source is Iterator<TSource> iterator)
+            if (!IsSizeOptimized && source is Iterator<TSource> iterator)
             {
                 return iterator.ToList();
             }
-#endif
 
             return new List<TSource>(source);
         }
diff --git a/src/libraries/System.Linq/tests/ConcatTests.cs b/src/libraries/System.Linq/tests/ConcatTests.cs
index 9ec1b5edf05867..53273b21d57e23 100644
--- a/src/libraries/System.Linq/tests/ConcatTests.cs
+++ b/src/libraries/System.Linq/tests/ConcatTests.cs
@@ -320,7 +320,7 @@ public static IEnumerable<object[]> ManyConcatsData()
             yield return [Enumerable.Range(0, 500).Select(i => Enumerable.Repeat(i, 1)).Reverse()];
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void CountOfConcatIteratorShouldThrowExceptionOnIntegerOverflow()
         {
             var supposedlyLargeCollection = new DelegateBasedCollection<int> { CountWorker = () => int.MaxValue };
diff --git a/src/libraries/System.Linq/tests/CountTests.cs b/src/libraries/System.Linq/tests/CountTests.cs
index 33f19e47433d87..ecb87f924d6e54 100644
--- a/src/libraries/System.Linq/tests/CountTests.cs
+++ b/src/libraries/System.Linq/tests/CountTests.cs
@@ -178,10 +178,11 @@ public static IEnumerable<object[]> NonEnumeratedCount_SupportedEnumerables()
 
             yield return WrapArgs(0, Enumerable.Empty<string>());
 
-            if (PlatformDetection.IsSpeedOptimized)
+            yield return WrapArgs(100, Enumerable.Range(1, 100));
+            yield return WrapArgs(80, Enumerable.Repeat(1, 80));
+
+            if (PlatformDetection.IsLinqSpeedOptimized)
             {
-                yield return WrapArgs(100, Enumerable.Range(1, 100));
-                yield return WrapArgs(80, Enumerable.Repeat(1, 80));
                 yield return WrapArgs(50, Enumerable.Range(1, 50).Select(x => x + 1));
                 yield return WrapArgs(4, new int[] { 1, 2, 3, 4 }.Select(x => x + 1));
                 yield return WrapArgs(50, Enumerable.Range(1, 50).Select(x => x + 1).Select(x => x - 1));
@@ -200,10 +201,8 @@ public static IEnumerable<object[]> NonEnumeratedCount_UnsupportedEnumerables()
             yield return WrapArgs(new Stack<int>([1, 2, 3, 4]).Select(x => x + 1));
             yield return WrapArgs(Enumerable.Range(1, 100).Distinct());
 
-            if (!PlatformDetection.IsSpeedOptimized)
+            if (!PlatformDetection.IsLinqSpeedOptimized)
             {
-                yield return WrapArgs(Enumerable.Range(1, 100));
-                yield return WrapArgs(Enumerable.Repeat(1, 80));
                 yield return WrapArgs(Enumerable.Range(1, 50).Select(x => x + 1));
                 yield return WrapArgs(new int[] { 1, 2, 3, 4 }.Select(x => x + 1));            
                 yield return WrapArgs(Enumerable.Range(1, 50).Select(x => x + 1).Select(x => x - 1));
diff --git a/src/libraries/System.Linq/tests/OrderedSubsetting.cs b/src/libraries/System.Linq/tests/OrderedSubsetting.cs
index be6ae710101842..5804ac1d4229e7 100644
--- a/src/libraries/System.Linq/tests/OrderedSubsetting.cs
+++ b/src/libraries/System.Linq/tests/OrderedSubsetting.cs
@@ -224,7 +224,7 @@ public void TakeAndSkip()
             Assert.Equal(Enumerable.Range(10, 1), ordered.Take(11).Skip(10));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void TakeAndSkip_DoesntIterateRangeUnlessNecessary()
         {
             Assert.Empty(Enumerable.Range(0, int.MaxValue).Take(int.MaxValue).OrderBy(i => i).Skip(int.MaxValue - 4).Skip(15));
diff --git a/src/libraries/System.Linq/tests/RangeTests.cs b/src/libraries/System.Linq/tests/RangeTests.cs
index 4b143d51d8ce48..79a41f80fcca62 100644
--- a/src/libraries/System.Linq/tests/RangeTests.cs
+++ b/src/libraries/System.Linq/tests/RangeTests.cs
@@ -210,19 +210,19 @@ public void FirstOrDefault()
             Assert.Equal(-100, Enumerable.Range(-100, int.MaxValue).FirstOrDefault());
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void Last()
         {
             Assert.Equal(1000000056, Enumerable.Range(57, 1000000000).Last());
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void LastOrDefault()
         {
             Assert.Equal(int.MaxValue - 101, Enumerable.Range(-100, int.MaxValue).LastOrDefault());
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void IListImplementationIsValid()
         {
             Validate(Enumerable.Range(42, 10), [42, 43, 44, 45, 46, 47, 48, 49, 50, 51]);
diff --git a/src/libraries/System.Linq/tests/RepeatTests.cs b/src/libraries/System.Linq/tests/RepeatTests.cs
index 7ae3370660e0ec..e63a93c046fdfb 100644
--- a/src/libraries/System.Linq/tests/RepeatTests.cs
+++ b/src/libraries/System.Linq/tests/RepeatTests.cs
@@ -234,7 +234,7 @@ public void Count()
             Assert.Equal(42, Enumerable.Repeat("Test", 42).Count());
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void ICollectionImplementationIsValid()
         {
             Validate(Enumerable.Repeat(42, 10), [42, 42, 42, 42, 42, 42, 42, 42, 42, 42]);
diff --git a/src/libraries/System.Linq/tests/SelectManyTests.cs b/src/libraries/System.Linq/tests/SelectManyTests.cs
index 55e0e35a193d55..297b00e8aa0828 100644
--- a/src/libraries/System.Linq/tests/SelectManyTests.cs
+++ b/src/libraries/System.Linq/tests/SelectManyTests.cs
@@ -470,7 +470,7 @@ public static IEnumerable<object[]> DisposeAfterEnumerationData()
             return lengths.SelectMany(l => lengths, (l1, l2) => new object[] { l1, l2 });
         }
 
-        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         [InlineData(new[] { int.MaxValue, 1 })]
         [InlineData(new[] { 2, int.MaxValue - 1 })]
         [InlineData(new[] { 123, 456, int.MaxValue - 100000, 123456 })]
diff --git a/src/libraries/System.Linq/tests/TakeTests.cs b/src/libraries/System.Linq/tests/TakeTests.cs
index b19b9c17d65163..299dfb24b776a0 100644
--- a/src/libraries/System.Linq/tests/TakeTests.cs
+++ b/src/libraries/System.Linq/tests/TakeTests.cs
@@ -1079,7 +1079,7 @@ public void RepeatEnumeratingNotList()
             Assert.Equal(taken5, taken5);
         }
 
-        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         [InlineData(1000)]
         [InlineData(1000000)]
         [InlineData(int.MaxValue)]
@@ -2033,7 +2033,7 @@ public void EmptySource_DoNotThrowException_EnumerablePartition()
             Assert.Empty(EnumerablePartitionOrEmpty(source).Take(^6..^7));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsSpeedOptimized))]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsLinqSpeedOptimized))]
         public void SkipTakeOnIListIsIList()
         {
             IList<int> list = new ReadOnlyCollection<int>(Enumerable.Range(0, 100).ToList());
diff --git a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/IOControlKeepAlive.Windows.cs b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/IOControlKeepAlive.Windows.cs
index 76fd1ca3fe15da..5fff0bf763481a 100644
--- a/src/libraries/System.Net.Sockets/src/System/Net/Sockets/IOControlKeepAlive.Windows.cs
+++ b/src/libraries/System.Net.Sockets/src/System/Net/Sockets/IOControlKeepAlive.Windows.cs
@@ -61,7 +61,7 @@ public static SocketError Set(SafeSocketHandle handle, SocketOptionName optionNa
 
         public static SocketError Set(SafeSocketHandle handle, SocketOptionName optionName, int optionValueSeconds)
         {
-            IOControlKeepAlive ioControlKeepAlive = s_socketKeepAliveTable.GetValue(handle, (SafeSocketHandle handle) => new IOControlKeepAlive());
+            IOControlKeepAlive ioControlKeepAlive = s_socketKeepAliveTable.GetOrAdd(handle, (SafeSocketHandle handle) => new IOControlKeepAlive());
             if (optionName == SocketOptionName.TcpKeepAliveTime)
             {
                 ioControlKeepAlive._timeMs = SecondsToMilliseconds(optionValueSeconds);
diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs
index d3c31753c42ba8..9475c3390558f5 100644
--- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs
+++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs
@@ -487,7 +487,7 @@ public static void ResizeTo<T>(scoped in System.Numerics.Tensors.Tensor<T> tenso
         public static ref readonly System.Numerics.Tensors.TensorSpan<T> StackAlongDimension<T>(scoped System.ReadOnlySpan<System.Numerics.Tensors.Tensor<T>> tensors, in System.Numerics.Tensors.TensorSpan<T> destination, int dimension) { throw null; }
         public static System.Numerics.Tensors.Tensor<T> Stack<T>(params scoped System.ReadOnlySpan<System.Numerics.Tensors.Tensor<T>> tensors) { throw null; }
         public static ref readonly System.Numerics.Tensors.TensorSpan<T> Stack<T>(scoped in System.ReadOnlySpan<System.Numerics.Tensors.Tensor<T>> tensors, in System.Numerics.Tensors.TensorSpan<T> destination) { throw null; }
-        public static T StdDev<T>(in System.Numerics.Tensors.ReadOnlyTensorSpan<T> x) where T : System.Numerics.IFloatingPoint<T>, System.Numerics.IPowerFunctions<T>, System.Numerics.IAdditionOperators<T, T, T>, System.Numerics.IAdditiveIdentity<T, T> { throw null; }
+        public static T StdDev<T>(in System.Numerics.Tensors.ReadOnlyTensorSpan<T> x) where T : System.Numerics.IFloatingPoint<T>, System.Numerics.IAdditionOperators<T, T, T>, System.Numerics.IAdditiveIdentity<T, T>, System.Numerics.IRootFunctions<T> { throw null; }
         public static System.Numerics.Tensors.Tensor<T> Subtract<T>(in System.Numerics.Tensors.ReadOnlyTensorSpan<T> x, in System.Numerics.Tensors.ReadOnlyTensorSpan<T> y) where T : System.Numerics.ISubtractionOperators<T, T, T> { throw null; }
         public static ref readonly System.Numerics.Tensors.TensorSpan<T> Subtract<T>(scoped in System.Numerics.Tensors.ReadOnlyTensorSpan<T> x, scoped in System.Numerics.Tensors.ReadOnlyTensorSpan<T> y, in System.Numerics.Tensors.TensorSpan<T> destination) where T : System.Numerics.ISubtractionOperators<T, T, T> { throw null; }
         public static System.Numerics.Tensors.Tensor<T> Subtract<T>(in System.Numerics.Tensors.ReadOnlyTensorSpan<T> x, T y) where T : System.Numerics.ISubtractionOperators<T, T, T> { throw null; }
diff --git a/src/libraries/System.Numerics.Tensors/src/CompatibilitySuppressions.xml b/src/libraries/System.Numerics.Tensors/src/CompatibilitySuppressions.xml
index ae2ea3d401a7b1..d05601550864ea 100644
--- a/src/libraries/System.Numerics.Tensors/src/CompatibilitySuppressions.xml
+++ b/src/libraries/System.Numerics.Tensors/src/CompatibilitySuppressions.xml
@@ -253,4 +253,18 @@
     <Right>lib/net9.0/System.Numerics.Tensors.dll</Right>
     <IsBaselineSuppression>true</IsBaselineSuppression>
   </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0021</DiagnosticId>
+    <Target>M:System.Numerics.Tensors.Tensor.StdDev``1(System.Numerics.Tensors.ReadOnlyTensorSpan{``0}@)``0:T:System.Numerics.IRootFunctions{``0}</Target>
+    <Left>lib/net8.0/System.Numerics.Tensors.dll</Left>
+    <Right>lib/net8.0/System.Numerics.Tensors.dll</Right>
+    <IsBaselineSuppression>true</IsBaselineSuppression>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0021</DiagnosticId>
+    <Target>M:System.Numerics.Tensors.Tensor.StdDev``1(System.Numerics.Tensors.ReadOnlyTensorSpan{``0}@)``0:T:System.Numerics.IRootFunctions{``0}</Target>
+    <Left>lib/net9.0/System.Numerics.Tensors.dll</Left>
+    <Right>lib/net9.0/System.Numerics.Tensors.dll</Right>
+    <IsBaselineSuppression>true</IsBaselineSuppression>
+  </Suppression>
 </Suppressions>
\ No newline at end of file
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorExtensions.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorExtensions.cs
index 5ba164337bfc91..84c933226fb2a1 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorExtensions.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorExtensions.cs
@@ -3512,26 +3512,15 @@ public static ref readonly TensorSpan<T> StackAlongDimension<T>(scoped ReadOnlyS
         /// <param name="x">The <see cref="TensorSpan{T}"/> to take the standard deviation of.</param>
         /// <returns><typeparamref name="T"/> representing the standard deviation.</returns>
         public static T StdDev<T>(in ReadOnlyTensorSpan<T> x)
-            where T : IFloatingPoint<T>, IPowerFunctions<T>, IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>
+            where T : IFloatingPoint<T>, IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IRootFunctions<T>
         {
             T mean = Average(x);
-            Span<T> span = MemoryMarshal.CreateSpan(ref x._reference, (int)x._shape._memoryLength);
-            Span<T> output = new T[x.FlattenedLength];
-            TensorPrimitives.Subtract(span, mean, output);
-            TensorPrimitives.Abs(output, output);
-            TensorPrimitives.Pow((ReadOnlySpan<T>)output, T.CreateChecked(2), output);
-            T sum = TensorPrimitives.Sum((ReadOnlySpan<T>)output);
-            T variance = sum / T.CreateChecked(x._shape._memoryLength);
-
-            if (typeof(T) == typeof(float))
-            {
-                return T.CreateChecked(MathF.Sqrt(float.CreateChecked(variance)));
-            }
-            if (typeof(T) == typeof(double))
-            {
-                return T.CreateChecked(Math.Sqrt(double.CreateChecked(variance)));
-            }
-            return T.Pow(variance, T.CreateChecked(0.5));
+            Tensor<T> temp = CreateUninitialized<T>(x.Lengths);
+            Subtract(x, mean, temp);
+            Abs<T>(temp, temp);
+            T sum = SumOfSquares<T>(temp);
+            T variance = sum / T.CreateChecked(x.FlattenedLength);
+            return T.Sqrt(variance);
         }
         #endregion
 
@@ -6664,6 +6653,19 @@ public static T Sum<T>(scoped in ReadOnlyTensorSpan<T> x)
         }
         #endregion
 
+        #region SumOfSquares
+        /// <summary>
+        /// Sums the squared elements of the specified tensor.
+        /// </summary>
+        /// <param name="x">Tensor to sum squares of</param>
+        /// <returns></returns>
+        internal static T SumOfSquares<T>(scoped in ReadOnlyTensorSpan<T> x)
+            where T : IAdditionOperators<T, T, T>, IAdditiveIdentity<T, T>, IMultiplyOperators<T, T, T>
+        {
+            return TensorPrimitivesHelperSpanInTOut(x, TensorPrimitives.SumOfSquares);
+        }
+        #endregion
+
         #region Tan
         /// <summary>Computes the element-wise tangent of the value in the specified tensor.</summary>
         /// <param name="x">The <see cref="ReadOnlyTensorSpan{T}"/> to take the sin of.</param>
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
index 165779fe329a61..607d09788db1ce 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorTests.cs
@@ -1124,6 +1124,15 @@ public static void TensorStdDevTests()
             Tensor<float> t0 = Tensor.Create<float>((Enumerable.Range(0, 4).Select(i => (float)i)), [2, 2]);
 
             Assert.Equal(StdDev([0, 1, 2, 3]), Tensor.StdDev<float>(t0), .1);
+
+            // Test that non-contiguous calculations work
+            Tensor<float> fourByFour = Tensor.Create<float>([4, 4]);
+            fourByFour[[0, 0]] = 1f;
+            fourByFour[[0, 1]] = 1f;
+            fourByFour[[1, 0]] = 1f;
+            fourByFour[[1, 1]] = 1f;
+            ReadOnlyTensorSpan<float> upperLeft = fourByFour.AsReadOnlyTensorSpan().Slice([0..2, 0..2]);
+            Assert.Equal(0f, Tensor.StdDev(upperLeft));
         }
 
         public static float StdDev(float[] values)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Memory.cs b/src/libraries/System.Private.CoreLib/src/System/Memory.cs
index 5ec18c4cb842ec..a0da08ba9a8038 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Memory.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Memory.cs
@@ -20,9 +20,6 @@ namespace System
     [DebuggerDisplay("{ToString(),raw}")]
     public readonly struct Memory<T> : IEquatable<Memory<T>>
     {
-        // NOTE: With the current implementation, Memory<T> and ReadOnlyMemory<T> must have the same layout,
-        // as code uses Unsafe.As to cast between them.
-
         // The highest order bit of _index is used to discern whether _object is a pre-pinned array.
         // (_index < 0) => _object is a pre-pinned array, so Pin() will not allocate a new GCHandle
         //       (else) => Pin() needs to allocate a new GCHandle to pin the object.
@@ -187,7 +184,7 @@ internal Memory(object? obj, int start, int length)
         /// Defines an implicit conversion of a <see cref="Memory{T}"/> to a <see cref="ReadOnlyMemory{T}"/>
         /// </summary>
         public static implicit operator ReadOnlyMemory<T>(Memory<T> memory) =>
-            Unsafe.As<Memory<T>, ReadOnlyMemory<T>>(ref memory);
+            new ReadOnlyMemory<T>(memory._object, memory._index, memory._length);
 
         /// <summary>
         /// Returns an empty <see cref="Memory{T}"/>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
index 57a71792bc5f0f..744efd825eecce 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
@@ -349,6 +349,7 @@ public static Vector<T> ClampNative<T>(Vector<T> value, Vector<T> min, Vector<T>
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> ConditionalSelect<T>(Vector<T> condition, Vector<T> left, Vector<T> right) => (left & condition) | AndNot(right, condition);
@@ -358,6 +359,7 @@ public static Vector<T> ClampNative<T>(Vector<T> value, Vector<T> min, Vector<T>
         /// <param name="left">The vector that is selected when the corresponding bit in <paramref name="condition" /> is one.</param>
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         public static Vector<float> ConditionalSelect(Vector<int> condition, Vector<float> left, Vector<float> right) => ConditionalSelect(condition.As<int, float>(), left, right);
 
@@ -366,6 +368,7 @@ public static Vector<T> ClampNative<T>(Vector<T> value, Vector<T> min, Vector<T>
         /// <param name="left">The vector that is selected when the corresponding bit in <paramref name="condition" /> is one.</param>
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         public static Vector<double> ConditionalSelect(Vector<long> condition, Vector<double> left, Vector<double> right) => ConditionalSelect(condition.As<long, double>(), left, right);
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs b/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
index ee2059ef0170aa..61bd9314bd26ca 100644
--- a/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/ReadOnlyMemory.cs
@@ -20,15 +20,12 @@ namespace System
     [DebuggerDisplay("{ToString(),raw}")]
     public readonly struct ReadOnlyMemory<T> : IEquatable<ReadOnlyMemory<T>>
     {
-        // NOTE: With the current implementation, Memory<T> and ReadOnlyMemory<T> must have the same layout,
-        // as code uses Unsafe.As to cast between them.
-
         // The highest order bit of _index is used to discern whether _object is a pre-pinned array.
         // (_index < 0) => _object is a pre-pinned array, so Pin() will not allocate a new GCHandle
         //       (else) => Pin() needs to allocate a new GCHandle to pin the object.
-        private readonly object? _object;
-        private readonly int _index;
-        private readonly int _length;
+        internal readonly object? _object;
+        internal readonly int _index;
+        internal readonly int _length;
 
         internal const int RemoveFlagsBitMask = 0x7FFFFFFF;
 
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ConditionalWeakTable.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ConditionalWeakTable.cs
index 1b5c0894966d3f..2318211cb7e9f3 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ConditionalWeakTable.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/ConditionalWeakTable.cs
@@ -3,6 +3,7 @@
 
 using System.Collections;
 using System.Collections.Generic;
+using System.ComponentModel;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Numerics;
@@ -188,36 +189,125 @@ public void Clear()
         }
 
         /// <summary>
-        /// Atomically searches for a specified key in the table and returns the corresponding value.
-        /// If the key does not exist in the table, the method invokes a callback method to create a
-        /// value that is bound to the specified key.
+        /// Searches for a specified key in the table and returns the corresponding value. If the key does
+        /// not exist in the table, the method adds the given value and binds it to the specified key.
+        /// </summary>
+        /// <param name="key">The key of the value to find. It cannot be <see langword="null"/>.</param>
+        /// <param name="value">The value to add and bind to <typeparamref name="TKey"/>, if one does not exist already.</param>
+        /// <returns>The value bound to <typeparamref name="TKey"/> in the current <see cref="ConditionalWeakTable{TKey, TValue}"/> instance, after the method completes.</returns>
+        /// <exception cref="ArgumentNullException"><paramref name="key"/> is <see langword="null"/>.</exception>
+        public TValue GetOrAdd(TKey key, TValue value)
+        {
+            // key is validated by TryGetValue
+            if (TryGetValue(key, out TValue? existingValue))
+            {
+                return existingValue;
+            }
+
+            return GetOrAddLocked(key, value);
+        }
+
+        /// <summary>
+        /// Searches for a specified key in the table and returns the corresponding value. If the key does not exist
+        /// in the table, the method invokes the supplied factory to create a value that is bound to the specified key.
+        /// </summary>
+        /// <param name="key">The key of the value to find. It cannot be <see langword="null"/>.</param>
+        /// <param name="valueFactory">The callback that creates a value for key, if one does not exist already. It cannot be <see langword="null"/>.</param>
+        /// <returns>The value bound to <typeparamref name="TKey"/> in the current <see cref="ConditionalWeakTable{TKey, TValue}"/> instance, after the method completes.</returns>
+        /// <exception cref="ArgumentNullException"><paramref name="key"/> or <paramref name="valueFactory"/> are <see langword="null"/>.</exception>
+        /// <remarks>
+        /// If multiple threads try to initialize the same key, the table may invoke <paramref name="valueFactory"/> multiple times
+        /// with the same key. Exactly one of these calls will succeed and the returned value of that call will be the one added to
+        /// the table and returned by all the racing <see cref="GetOrAdd(TKey, Func{TKey, TValue})"/> calls. This rule permits the
+        /// table to invoke <paramref name="valueFactory"/> outside the internal table lock, to prevent deadlocks.
+        /// </remarks>
+        public TValue GetOrAdd(TKey key, Func<TKey, TValue> valueFactory)
+        {
+            ArgumentNullException.ThrowIfNull(valueFactory);
+
+            // key is validated by TryGetValue
+            if (TryGetValue(key, out TValue? existingValue))
+            {
+                return existingValue;
+            }
+
+            // create the value outside of the lock
+            TValue value = valueFactory(key);
+
+            return GetOrAddLocked(key, value);
+        }
+
+        /// <summary>
+        /// Searches for a specified key in the table and returns the corresponding value. If the key does not exist
+        /// in the table, the method invokes the supplied factory to create a value that is bound to the specified key.
+        /// </summary>
+        /// <typeparam name="TArg">The type of the additional argument to use with the value factory.</typeparam>
+        /// <param name="key">The key of the value to find. It cannot be <see langword="null"/>.</param>
+        /// <param name="valueFactory">The callback that creates a value for key, if one does not exist already. It cannot be <see langword="null"/>.</param>
+        /// <param name="factoryArgument">The additional argument to supply to <paramref name="valueFactory"/> upon invocation.</param>
+        /// <returns>The value bound to <typeparamref name="TKey"/> in the current <see cref="ConditionalWeakTable{TKey, TValue}"/> instance, after the method completes.</returns>
+        /// <exception cref="ArgumentNullException"><paramref name="key"/> or <paramref name="valueFactory"/> are <see langword="null"/>.</exception>
+        /// <remarks>
+        /// If multiple threads try to initialize the same key, the table may invoke <paramref name="valueFactory"/> multiple times with the
+        /// same key. Exactly one of these calls will succeed and the returned value of that call will be the one added to the table and
+        /// returned by all the racing <see cref="GetOrAdd{TArg}(TKey, Func{TKey, TArg, TValue}, TArg)"/> calls. This rule permits the
+        /// table to invoke <paramref name="valueFactory"/> outside the internal table lock, to prevent deadlocks.
+        /// </remarks>
+        public TValue GetOrAdd<TArg>(TKey key, Func<TKey, TArg, TValue> valueFactory, TArg factoryArgument)
+            where TArg : allows ref struct
+        {
+            ArgumentNullException.ThrowIfNull(valueFactory);
+
+            // key is validated by TryGetValue
+            if (TryGetValue(key, out TValue? existingValue))
+            {
+                return existingValue;
+            }
+
+            // create the value outside of the lock
+            TValue value = valueFactory(key, factoryArgument);
+
+            return GetOrAddLocked(key, value);
+        }
+
+        /// <summary>
+        /// Searches for a specified key in the table and returns the corresponding value. If the key does not exist
+        /// in the table, the method invokes a callback method to create a value that is bound to the specified key.
         /// </summary>
         /// <param name="key">key of the value to find. Cannot be null.</param>
         /// <param name="createValueCallback">callback that creates value for key. Cannot be null.</param>
         /// <returns></returns>
         /// <remarks>
+        /// <para>
         /// If multiple threads try to initialize the same key, the table may invoke createValueCallback
         /// multiple times with the same key. Exactly one of these calls will succeed and the returned
         /// value of that call will be the one added to the table and returned by all the racing GetValue() calls.
         /// This rule permits the table to invoke createValueCallback outside the internal table lock
         /// to prevent deadlocks.
+        /// </para>
+        /// <para>
+        /// Consider using <see cref="GetOrAdd(TKey, Func{TKey, TValue})"/> (or one of its overloads) instead.
+        /// </para>
         /// </remarks>
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public TValue GetValue(TKey key, CreateValueCallback createValueCallback)
         {
             ArgumentNullException.ThrowIfNull(createValueCallback);
 
             // key is validated by TryGetValue
-            return TryGetValue(key, out TValue? existingValue) ?
-                existingValue :
-                GetValueLocked(key, createValueCallback);
+            if (TryGetValue(key, out TValue? existingValue))
+            {
+                return existingValue;
+            }
+
+            // create the value outside of the lock
+            TValue value = createValueCallback(key);
+
+            return GetOrAddLocked(key, value);
         }
 
-        private TValue GetValueLocked(TKey key, CreateValueCallback createValueCallback)
+        private TValue GetOrAddLocked(TKey key, TValue value)
         {
-            // If we got here, the key was not in the table. Invoke the callback (outside the lock)
-            // to generate the new value for the key.
-            TValue newValue = createValueCallback(key);
-
             lock (_lock)
             {
                 // Now that we've taken the lock, must recheck in case we lost a race to add the key.
@@ -228,8 +318,8 @@ private TValue GetValueLocked(TKey key, CreateValueCallback createValueCallback)
                 else
                 {
                     // Verified in-lock that we won the race to add the key. Add it now.
-                    CreateEntry(key, newValue);
-                    return newValue;
+                    CreateEntry(key, value);
+                    return value;
                 }
             }
         }
@@ -239,8 +329,13 @@ private TValue GetValueLocked(TKey key, CreateValueCallback createValueCallback)
         /// to create new instances as needed.  If TValue does not have a default constructor, this will throw.
         /// </summary>
         /// <param name="key">key of the value to find. Cannot be null.</param>
+        /// <remarks>
+        /// Consider using <see cref="GetOrAdd(TKey, Func{TKey, TValue})"/> (or one of its overloads) instead.
+        /// </remarks>
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public TValue GetOrCreateValue(TKey key) => GetValue(key, _ => Activator.CreateInstance<TValue>());
 
+        [EditorBrowsable(EditorBrowsableState.Never)]
         public delegate TValue CreateValueCallback(TKey key);
 
         /// <summary>Gets an enumerator for the table.</summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
index d849a0d58c75ed..0fc0642c52978b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/MemoryMarshal.cs
@@ -71,7 +71,7 @@ ref Unsafe.As<T, byte>(ref GetReference(span)),
         /// as <see cref="Memory{T}"/> but only used for reading to store a <see cref="ReadOnlyMemory{T}"/>.
         /// </remarks>
         public static Memory<T> AsMemory<T>(ReadOnlyMemory<T> memory) =>
-            Unsafe.As<ReadOnlyMemory<T>, Memory<T>>(ref memory);
+            new Memory<T>(memory._object, memory._index, memory._length);
 
         /// <summary>
         /// Returns a reference to the 0th element of the Span. If the Span is empty, returns a reference to the location where the 0th element
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/ISimdVector_2.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/ISimdVector_2.cs
index bec27899e9f6b5..b00c7701adaba4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/ISimdVector_2.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/ISimdVector_2.cs
@@ -166,6 +166,7 @@ internal unsafe interface ISimdVector<TSelf, T>
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         /// <exception cref="NotSupportedException">The type of the elements in the vector (<typeparamref name="T" />) is not supported.</exception>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         static virtual TSelf ConditionalSelect(TSelf condition, TSelf left, TSelf right) => (left & condition) | (right & ~condition);
 
         /// <summary>Copies the per-element sign of a vector to the per-element sign of another vector.</summary>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 6831123b931848..11c0a7e113dda0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -346,6 +346,7 @@ public static Vector128<T> ClampNative<T>(Vector128<T> value, Vector128<T> min,
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> ConditionalSelect<T>(Vector128<T> condition, Vector128<T> left, Vector128<T> right) => (left & condition) | AndNot(right, condition);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index 36093c16b86d94..56cb30e7bf4bea 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -384,6 +384,7 @@ public static Vector256<T> ClampNative<T>(Vector256<T> value, Vector256<T> min,
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> ConditionalSelect<T>(Vector256<T> condition, Vector256<T> left, Vector256<T> right) => (left & condition) | AndNot(right, condition);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
index 6877dae70af508..2cdfba21cd61b9 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -384,6 +384,7 @@ public static Vector512<T> ClampNative<T>(Vector512<T> value, Vector512<T> min,
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector512<T> ConditionalSelect<T>(Vector512<T> condition, Vector512<T> left, Vector512<T> right) => (left & condition) | AndNot(right, condition);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index 4ddbe5eb4831b6..d7d17e8599933c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -352,6 +352,7 @@ public static Vector64<T> ClampNative<T>(Vector64<T> value, Vector64<T> min, Vec
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <remarks>The returned vector is equivalent to <paramref name="condition" /> <c>?</c> <paramref name="left" /> <c>:</c> <paramref name="right" /> on a per-bit basis.</remarks>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector64<T> ConditionalSelect<T>(Vector64<T> condition, Vector64<T> left, Vector64<T> right) => (left & condition) | AndNot(right, condition);
diff --git a/src/libraries/System.Reflection.DispatchProxy/src/System/Reflection/DispatchProxyGenerator.cs b/src/libraries/System.Reflection.DispatchProxy/src/System/Reflection/DispatchProxyGenerator.cs
index 6153f7e0cfdbde..5181722c578c4e 100644
--- a/src/libraries/System.Reflection.DispatchProxy/src/System/Reflection/DispatchProxyGenerator.cs
+++ b/src/libraries/System.Reflection.DispatchProxy/src/System/Reflection/DispatchProxyGenerator.cs
@@ -71,7 +71,7 @@ internal static object CreateProxyInstance(
             AssemblyLoadContext? alc = AssemblyLoadContext.GetLoadContext(baseType.Assembly);
             Debug.Assert(alc != null);
 
-            ProxyAssembly proxyAssembly = s_alcProxyAssemblyMap.GetValue(alc, static x => new ProxyAssembly(x));
+            ProxyAssembly proxyAssembly = s_alcProxyAssemblyMap.GetOrAdd(alc, static x => new ProxyAssembly(x));
             GeneratedTypeInfo proxiedType = proxyAssembly.GetProxyType(baseType, interfaceType, interfaceParameter, proxyParameter);
             return Activator.CreateInstance(proxiedType.GeneratedType, new object[] { proxiedType.MethodInfos })!;
         }
diff --git a/src/libraries/System.Runtime.Numerics/tests/BigInteger/BigIntegerToStringTests.cs b/src/libraries/System.Runtime.Numerics/tests/BigInteger/BigIntegerToStringTests.cs
index f0219a252787a6..8aec40e50d4e13 100644
--- a/src/libraries/System.Runtime.Numerics/tests/BigInteger/BigIntegerToStringTests.cs
+++ b/src/libraries/System.Runtime.Numerics/tests/BigInteger/BigIntegerToStringTests.cs
@@ -521,22 +521,23 @@ public static void ToString_InvalidFormat_ThrowsFormatException()
             Assert.Throws<FormatException>(() => b.ToString("G000001000000000"));
         }
 
-        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] // Requires a lot of memory
-        [OuterLoop("Takes a long time, allocates a lot of memory")]
-        [SkipOnMono("Frequently throws OOM on Mono")]
+        [Fact]
         public static void ToString_ValidLargeFormat()
         {
             BigInteger b = new BigInteger(123456789000m);
 
             // Format precision limit is 999_999_999 (9 digits). Anything larger should throw.
+            // We use TryFormat rather than ToString to avoid excessive memory usage.
 
-            // Check ParseFormatSpecifier in FormatProvider.Number.cs with `E` format
-            b.ToString("E999999999"); // Should not throw
-            b.ToString("E00000999999999"); // Should not throw
+            // Check ParseFormatSpecifier in FormatProvider.Number.cs with `E` format.
+            // Currently disabled since these would still allocate a 2GB buffer before
+            // returning, leading to OOM in CI.
+            //Assert.False(b.TryFormat(Span<char>.Empty, out _, format: "E999999999")); // Should not throw
+            //Assert.False(b.TryFormat(Span<char>.Empty, out _, format: "E00000999999999")); // Should not throw
 
             // Check ParseFormatSpecifier in Number.BigInteger.cs with `G` format
-            b.ToString("G999999999"); // Should not throw
-            b.ToString("G00000999999999"); // Should not throw
+            Assert.False(b.TryFormat(Span<char>.Empty, out _, format: "G999999999")); // Should not throw
+            Assert.False(b.TryFormat(Span<char>.Empty, out _, format: "G00000999999999")); // Should not throw
         }
 
         private static void RunSimpleProviderToStringTests(Random random, string format, NumberFormatInfo provider, int precision, StringFormatter formatter)
diff --git a/src/libraries/System.Runtime.Numerics/tests/BigInteger/GetBitLengthTests.cs b/src/libraries/System.Runtime.Numerics/tests/BigInteger/GetBitLengthTests.cs
index ab02a31ad5b17f..f8422070565db3 100644
--- a/src/libraries/System.Runtime.Numerics/tests/BigInteger/GetBitLengthTests.cs
+++ b/src/libraries/System.Runtime.Numerics/tests/BigInteger/GetBitLengthTests.cs
@@ -44,8 +44,10 @@ public static void RunGetBitLengthTests()
         public static void RunGetBitLengthTestsLarge()
         {
             // Very large cases
-            VerifyGetBitLength(BigInteger.One << 32 << int.MaxValue, int.MaxValue + 32L + 1, 1);
-            VerifyGetBitLength(BigInteger.One << 64 << int.MaxValue, int.MaxValue + 64L + 1, 1);
+            // Values which are large but beneath the upper bound of
+            // (2^31) - 1 bits and which should not cause OOM in CI.
+            VerifyGetBitLength(BigInteger.One << 32 << (1 << 24), (1 << 24) + 32L + 1, 1);
+            VerifyGetBitLength(BigInteger.One << 64 << (1 << 24), (1 << 24) + 64L + 1, 1);
         }
 
         private static void VerifyLoopGetBitLength(Random random, bool isSmall)
diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs
index f16290e603a976..5f8847cf2f4518 100644
--- a/src/libraries/System.Runtime/ref/System.Runtime.cs
+++ b/src/libraries/System.Runtime/ref/System.Runtime.cs
@@ -13276,13 +13276,19 @@ public ConditionalWeakTable() { }
         public void Add(TKey key, TValue value) { }
         public void AddOrUpdate(TKey key, TValue value) { }
         public void Clear() { }
+        public TValue GetOrAdd(TKey key, TValue value) { throw null; }
+        public TValue GetOrAdd(TKey key, System.Func<TKey, TValue> valueFactory) { throw null; }
+        public TValue GetOrAdd<TArg>(TKey key, System.Func<TKey, TArg, TValue> valueFactory, TArg factoryArgument) where TArg : allows ref struct { throw null; }
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public TValue GetOrCreateValue(TKey key) { throw null; }
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public TValue GetValue(TKey key, System.Runtime.CompilerServices.ConditionalWeakTable<TKey, TValue>.CreateValueCallback createValueCallback) { throw null; }
         public bool Remove(TKey key) { throw null; }
         System.Collections.Generic.IEnumerator<System.Collections.Generic.KeyValuePair<TKey, TValue>> System.Collections.Generic.IEnumerable<System.Collections.Generic.KeyValuePair<TKey, TValue>>.GetEnumerator() { throw null; }
         System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { throw null; }
         public bool TryAdd(TKey key, TValue value) { throw null; }
         public bool TryGetValue(TKey key, [System.Diagnostics.CodeAnalysis.MaybeNullWhenAttribute(false)] out TValue value) { throw null; }
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
         public delegate TValue CreateValueCallback(TKey key);
     }
     public readonly partial struct ConfiguredAsyncDisposable
diff --git a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs
index eb891f784618bd..9e6e46db401f4e 100644
--- a/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs
+++ b/src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.IndexOf.cs
@@ -12,10 +12,17 @@ public class CompareInfoIndexOfTests : CompareInfoTestsBase
     {
         public static IEnumerable<object[]> IndexOf_TestData()
         {
-            // Empty string
+            // Empty string, invariant
             yield return new object[] { s_invariantCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
             yield return new object[] { s_invariantCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
             yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
+            yield return new object[] { s_invariantCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };
+
+            // Empty string, using non-invariant (s_germanCompare) CompareInfo to test the ICU path
+            yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
+            yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
+            yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
+            yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };
 
             // OrdinalIgnoreCase
             yield return new object[] { s_invariantCompare, "Hello", "l", 0, 5, CompareOptions.OrdinalIgnoreCase, 2, 1 };
@@ -138,8 +145,8 @@ public static IEnumerable<object[]> IndexOf_TestData()
             }
 
             // Inputs where matched length does not equal value string length
-                yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
-                yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
+            yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
+            yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
             if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform)
             {
                 yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, CompareOptions.IgnoreNonSpace, 3, 2 };
@@ -153,7 +160,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
         {
             bool useNls = PlatformDetection.IsNlsGlobalization;
             // Searches for the ligature \u00C6
-            string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; // 3 failures here
+            string source1 = "Is AE or ae the same as \u00C6 or \u00E6?";
             yield return new object[] { s_invariantCompare, source1, "AE", 8, 18, CompareOptions.None, useNls ? 24 : -1, useNls ? 1 : 0};
             yield return new object[] { s_invariantCompare, source1, "ae", 8, 18, CompareOptions.None, 9 , 2};
             yield return new object[] { s_invariantCompare, source1, "\u00C6", 8, 18, CompareOptions.None, 24, 1 };
@@ -171,7 +178,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
         public static IEnumerable<object[]> IndexOf_U_WithDiaeresis_TestData()
         {
             // Searches for the combining character sequence Latin capital letter U with diaeresis or Latin small letter u with diaeresis.
-            string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; // 7 failures here
+            string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?";
             yield return new object[] { s_invariantCompare, source, "U\u0308", 8, 18, CompareOptions.None, 24, 1 };
             yield return new object[] { s_invariantCompare, source, "u\u0308", 8, 18, CompareOptions.None, 9, 2 };
             yield return new object[] { s_invariantCompare, source, "\u00DC", 8, 18, CompareOptions.None, 24, 1 };
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ConditionalWeakTableTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ConditionalWeakTableTests.cs
index c535ab341dd1b9..c4f70e4ab37687 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ConditionalWeakTableTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/Runtime/CompilerServices/ConditionalWeakTableTests.cs
@@ -22,6 +22,11 @@ public static void InvalidArgs_Throws()
             AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.Add(null, new object())); // null key
             AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.TryGetValue(null, out ignored)); // null key
             AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.Remove(null)); // null key
+            AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.GetOrAdd(null, new object())); // null key
+            AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.GetOrAdd(null, k => new object())); // null key
+            AssertExtensions.Throws<ArgumentNullException>("key", () => cwt.GetOrAdd(null, (k, a) => new object(), 42)); // null key
+            AssertExtensions.Throws<ArgumentNullException>("valueFactory", () => cwt.GetOrAdd(new object(), null)); // null factory
+            AssertExtensions.Throws<ArgumentNullException>("valueFactory", () => cwt.GetOrAdd(new object(), null, 42)); // null factory
             AssertExtensions.Throws<ArgumentNullException>("createValueCallback", () => cwt.GetValue(new object(), null)); // null delegate
 
             object key = new object();
@@ -62,6 +67,9 @@ public static void Add(int numObjects, bool tryAdd)
                     Assert.Same(values[i], value);
                     Assert.Same(value, cwt.GetOrCreateValue(keys[i]));
                     Assert.Same(value, cwt.GetValue(keys[i], _ => new object()));
+                    Assert.Same(value, cwt.GetOrAdd(keys[i], new object()));
+                    Assert.Same(value, cwt.GetOrAdd(keys[i], k => new object()));
+                    Assert.Same(value, cwt.GetOrAdd(keys[i], (k, a) => new object(), 42));
                 }
 
                 return Tuple.Create(cwt, keys.Select(k => new WeakReference(k)).ToArray(), values.Select(v => new WeakReference(v)).ToArray());
@@ -86,6 +94,9 @@ public static void TryAdd_ConditionallyAdds()
 
             object value1 = new object();
             object value2 = new object();
+            object value3 = new object();
+            object value4 = new object();
+            object value5 = new object();
             object found;
 
             object key1 = new object();
@@ -102,8 +113,30 @@ public static void TryAdd_ConditionallyAdds()
             Assert.Same(value1, found);
             Assert.Equal(2, cwt.Count());
 
+            object key3 = new object();
+            Assert.Same(value1, cwt.GetOrAdd(key1, new object()));
+            Assert.Same(value3, cwt.GetOrAdd(key3, value3));
+            Assert.Same(value3, cwt.GetOrAdd(key3, new object()));
+            Assert.True(cwt.Remove(key3));
+            Assert.Same(value4, cwt.GetOrAdd(key3, value4));
+
+            object key4 = new object();
+            Assert.Same(value4, cwt.GetOrAdd(key4, k => value4));
+            Assert.Same(value4, cwt.GetOrAdd(key4, k => new object()));
+            Assert.True(cwt.Remove(key4));
+            Assert.Same(value5, cwt.GetOrAdd(key4, k => value5));
+
+            object key5 = new object();
+            Assert.Same(value5, cwt.GetOrAdd(key5, (k, a) => a, value5));
+            Assert.Same(value5, cwt.GetOrAdd(key5, (k, a) => a, new object()));
+            Assert.True(cwt.Remove(key5));
+            Assert.Same(value4, cwt.GetOrAdd(key5, (k, a) => a, value4));
+
             GC.KeepAlive(key1);
             GC.KeepAlive(key2);
+            GC.KeepAlive(key3);
+            GC.KeepAlive(key4);
+            GC.KeepAlive(key5);
         }
 
         [Theory]
@@ -185,6 +218,66 @@ public static void Concurrent_Add_Read_Remove_DifferentObjects()
             });
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Add_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+
+            // Here we use a lower threshold to reduce test time (same below).
+            // This applies to all the new 'GetOrAdd' tests in this file.
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    cwt.Add(key, value);
+                    Assert.Same(value, cwt.GetOrAdd(key, new object()));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_Add_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    cwt.Add(key, value);
+                    Assert.Same(value, cwt.GetOrAdd(key, _ => new object()));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_WithArg_Add_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    cwt.Add(key, value);
+                    Assert.Same(value, cwt.GetOrAdd(key, (k, a) => a, new object()));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
         public static void Concurrent_GetValue_Read_Remove_DifferentObjects()
         {
@@ -203,6 +296,60 @@ public static void Concurrent_GetValue_Read_Remove_DifferentObjects()
             });
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    Assert.Same(value, cwt.GetOrAdd(key, value));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    Assert.Same(value, cwt.GetOrAdd(key, _ => value));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_WithArg_Read_Remove_DifferentObjects()
+        {
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    object key = new object();
+                    object value = new object();
+                    Assert.Same(value, cwt.GetOrAdd(key, (k, a) => a, value));
+                    Assert.True(cwt.Remove(key));
+                    Assert.False(cwt.Remove(key));
+                }
+            });
+        }
+
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
         public static void Concurrent_GetValue_Read_Remove_SameObject()
         {
@@ -221,6 +368,60 @@ public static void Concurrent_GetValue_Read_Remove_SameObject()
             });
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Read_Remove_SameObject()
+        {
+            object key = new object();
+            object value = new object();
+
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    Assert.Same(value, cwt.GetOrAdd(key, value));
+                    cwt.Remove(key);
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_Read_Remove_SameObject()
+        {
+            object key = new object();
+            object value = new object();
+
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    Assert.Same(value, cwt.GetOrAdd(key, _ => value));
+                    cwt.Remove(key);
+                }
+            });
+        }
+
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))]
+        public static void Concurrent_GetOrAdd_Factory_WithArg_Read_Remove_SameObject()
+        {
+            object key = new object();
+            object value = new object();
+
+            var cwt = new ConditionalWeakTable<object, object>();
+            DateTime end = DateTime.UtcNow + TimeSpan.FromSeconds(0.10);
+            Parallel.For(0, Environment.ProcessorCount, i =>
+            {
+                while (DateTime.UtcNow < end)
+                {
+                    Assert.Same(value, cwt.GetOrAdd(key, (k, a) => a, value));
+                    cwt.Remove(key);
+                }
+            });
+        }
+
         [System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.NoInlining)]
         static WeakReference GetWeakCondTabRef(out ConditionalWeakTable<object, object> cwt_out, out object key_out)
         {
@@ -369,6 +570,9 @@ public static void AddOrUpdateDataTest()
             Assert.Equal("value1", value);
             Assert.Equal(value, cwt.GetOrCreateValue(key));
             Assert.Equal(value, cwt.GetValue(key, k => "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, k => "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, (k, a) => a, "value1"));
 
             Assert.Throws<ArgumentNullException>(() => cwt.AddOrUpdate(null, "value2"));
 
@@ -377,6 +581,9 @@ public static void AddOrUpdateDataTest()
             Assert.Equal("value2", value);
             Assert.Equal(value, cwt.GetOrCreateValue(key));
             Assert.Equal(value, cwt.GetValue(key, k => "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, k => "value1"));
+            Assert.Equal(value, cwt.GetOrAdd(key, (k, a) => a, "value1"));
         }
 
         [Fact]
diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
index a9ebe3bbe8b9ce..0dd89ccdc5ee82 100644
--- a/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
+++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests/System/StringTests.cs
@@ -210,6 +210,7 @@ public static void Contains_Char(string s, char value, bool expected)
         [InlineData("Hello", 'e', StringComparison.CurrentCulture, true)]
         [InlineData("Hello", 'E', StringComparison.CurrentCulture, false)]
         [InlineData("", 'H', StringComparison.CurrentCulture, false)]
+        [InlineData("", '\u0301', StringComparison.CurrentCulture, false)] // Using non-ASCII character to test ICU path
         // CurrentCultureIgnoreCase
         [InlineData("Hello", 'H', StringComparison.CurrentCultureIgnoreCase, true)]
         [InlineData("Hello", 'Z', StringComparison.CurrentCultureIgnoreCase, false)]
diff --git a/src/libraries/System.Speech/src/System.Speech.csproj b/src/libraries/System.Speech/src/System.Speech.csproj
index a33222426885a2..f06f73fd105f8b 100644
--- a/src/libraries/System.Speech/src/System.Speech.csproj
+++ b/src/libraries/System.Speech/src/System.Speech.csproj
@@ -3,6 +3,7 @@
   <PropertyGroup>
     <TargetFrameworks>$(NetCoreAppCurrent)-windows;$(NetCoreAppCurrent);$(NetCoreAppMinimum)-windows;$(NetCoreAppMinimum);netstandard2.0</TargetFrameworks>
     <TargetFrameworks Condition="'$(NetCoreAppPrevious)' != ''">$(TargetFrameworks);$(NetCoreAppPrevious)-windows;$(NetCoreAppPrevious)</TargetFrameworks>
+    <UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <!-- CS0649: uninitialized interop type fields -->
     <!-- SA1129: https://github.com/DotNetAnalyzers/StyleCopAnalyzers/issues/3277 -->
diff --git a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonConverter.MetadataHandling.cs b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonConverter.MetadataHandling.cs
index 8f542612b90e4e..5fad7fd1e9a791 100644
--- a/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonConverter.MetadataHandling.cs
+++ b/src/libraries/System.Text.Json/src/System/Text/Json/Serialization/JsonConverter.MetadataHandling.cs
@@ -148,6 +148,13 @@ internal bool TryHandleSerializedObjectReference(Utf8JsonWriter writer, object v
                     if (resolver.ContainsReferenceForCycleDetection(value))
                     {
                         writer.WriteNullValue();
+
+                        if (polymorphicConverter is not null)
+                        {
+                            // Clear out any polymorphic state.
+                            state.PolymorphicTypeDiscriminator = null;
+                            state.PolymorphicTypeResolver = null;
+                        }
                         return true;
                     }
 
diff --git a/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.IgnoreCycles.cs b/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.IgnoreCycles.cs
index 0affb96da401e4..fd092427890a00 100644
--- a/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.IgnoreCycles.cs
+++ b/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.IgnoreCycles.cs
@@ -429,6 +429,48 @@ public async Task IgnoreCycles_BoxedValueShouldNotBeIgnored()
             await Test_Serialize_And_SerializeAsync_Contains(root, expectedSubstring: @"""DayOfBirth"":15", expectedTimes: 2, s_optionsIgnoreCycles);
         }
 
+        [Fact]
+        public async Task IgnoreCycles_DerivedType_InArray()
+        {
+            var worker = new OfficeWorker
+            {
+                Office = new Office
+                {
+                    Dummy = new()
+                }
+            };
+
+            worker.Office.Staff = [worker, new RemoteWorker()];
+
+            await Test_Serialize_And_SerializeAsync(worker, """{"Office":{"Staff":[null,{"$type":"remote"}],"Dummy":{}}}""", s_optionsIgnoreCycles);
+
+            worker.Office.Staff = [worker];
+
+            await Test_Serialize_And_SerializeAsync(worker, """{"Office":{"Staff":[null],"Dummy":{}}}""", s_optionsIgnoreCycles);
+        }
+
+        [JsonDerivedType(typeof(OfficeWorker), "office")]
+        [JsonDerivedType(typeof(RemoteWorker), "remote")]
+        public abstract class EmployeeLocation
+        {
+        }
+
+        public class OfficeWorker : EmployeeLocation
+        {
+            public Office Office { get; set; }
+        }
+
+        public class RemoteWorker : EmployeeLocation
+        {
+        }
+
+        public class Office
+        {
+            public EmployeeLocation[] Staff { get; set; }
+
+            public EmptyClass Dummy { get; set; }
+        }
+
         [Fact]
         public async Task CycleDetectionStatePersistsAcrossContinuations()
         {
diff --git a/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.Serialize.cs b/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.Serialize.cs
index dd166409de2699..8cc7cb1084f4b7 100644
--- a/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.Serialize.cs
+++ b/src/libraries/System.Text.Json/tests/Common/ReferenceHandlerTests/ReferenceHandlerTests.Serialize.cs
@@ -224,5 +224,49 @@ public async Task CustomHashCode()
             // otherwise objects would not be correctly identified when searching for them in the dictionary.
             Assert.Same(listCopy[0], listCopy[1]);
         }
+
+        [Fact]
+        public async Task Preserve_DerivedType_InArray()
+        {
+            var worker = new OfficeWorker
+            {
+                Office = new Office
+                {
+                    Dummy = new()
+                }
+            };
+
+            worker.Office.Staff = [worker, new RemoteWorker()];
+
+            string json = await Serializer.SerializeWrapper(worker, s_serializerOptionsPreserve);
+            Assert.Equal("""{"$id":"1","Office":{"$id":"2","Staff":[{"$ref":"1"},{"$id":"3","$type":"remote"}],"Dummy":{"$id":"4"}}}""", json);
+
+            worker.Office.Staff = [worker];
+
+            json = await Serializer.SerializeWrapper(worker, s_serializerOptionsPreserve);
+            Assert.Equal("""{"$id":"1","Office":{"$id":"2","Staff":[{"$ref":"1"}],"Dummy":{"$id":"3"}}}""", json);
+        }
+
+        [JsonDerivedType(typeof(OfficeWorker), "office")]
+        [JsonDerivedType(typeof(RemoteWorker), "remote")]
+        public abstract class EmployeeLocation
+        {
+        }
+
+        public class OfficeWorker : EmployeeLocation
+        {
+            public Office Office { get; set; }
+        }
+
+        public class RemoteWorker : EmployeeLocation
+        {
+        }
+
+        public class Office
+        {
+            public EmployeeLocation[] Staff { get; set; }
+
+            public EmptyClass Dummy { get; set; }
+        }
     }
 }
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.IgnoreCycles.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.IgnoreCycles.cs
index 8c073e1e5e30ee..3f9f56a26340ab 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.IgnoreCycles.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.IgnoreCycles.cs
@@ -89,6 +89,11 @@ public ReferenceHandlerTests_IgnoreCycles_Metadata(JsonSerializerWrapper seriali
         [JsonSerializable(typeof(TreeNode<List<string>>))]
         [JsonSerializable(typeof(TreeNode<object>))]
         [JsonSerializable(typeof(int))]
+        [JsonSerializable(typeof(EmployeeLocation))]
+        [JsonSerializable(typeof(EmployeeLocation[]))]
+        [JsonSerializable(typeof(OfficeWorker))]
+        [JsonSerializable(typeof(Office))]
+        [JsonSerializable(typeof(RemoteWorker))]
         internal sealed partial class ReferenceHandlerTests_IgnoreCyclesContext_Metadata : JsonSerializerContext
         {
         }
@@ -172,6 +177,11 @@ public ReferenceHandlerTests_IgnoreCycles_Default(JsonSerializerWrapper serializ
         [JsonSerializable(typeof(TreeNode<List<string>>))]
         [JsonSerializable(typeof(TreeNode<object>))]
         [JsonSerializable(typeof(int))]
+        [JsonSerializable(typeof(EmployeeLocation))]
+        [JsonSerializable(typeof(EmployeeLocation[]))]
+        [JsonSerializable(typeof(OfficeWorker))]
+        [JsonSerializable(typeof(Office))]
+        [JsonSerializable(typeof(RemoteWorker))]
         internal sealed partial class ReferenceHandlerTests_IgnoreCyclesContext_Default : JsonSerializerContext
         {
         }
diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.cs
index 03c1ede64586f0..36cb8c42f4efda 100644
--- a/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.cs
+++ b/src/libraries/System.Text.Json/tests/System.Text.Json.SourceGeneration.Tests/Serialization/ReferenceHandlerTests.cs
@@ -139,6 +139,11 @@ public ReferenceHandlerTests_Metadata(JsonSerializerWrapper serializer)
         [JsonSerializable(typeof(ClassWithConflictingIdProperty))]
         [JsonSerializable(typeof(ClassWithIgnoredConflictingProperty))]
         [JsonSerializable(typeof(ClassWithExtensionDataConflictingProperty))]
+        [JsonSerializable(typeof(EmployeeLocation))]
+        [JsonSerializable(typeof(EmployeeLocation[]))]
+        [JsonSerializable(typeof(OfficeWorker))]
+        [JsonSerializable(typeof(Office))]
+        [JsonSerializable(typeof(RemoteWorker))]
         internal sealed partial class ReferenceHandlerTestsContext_Metadata : JsonSerializerContext
         {
         }
@@ -281,6 +286,11 @@ public ReferenceHandlerTests_Default(JsonSerializerWrapper serializer)
         [JsonSerializable(typeof(ClassWithConflictingIdProperty))]
         [JsonSerializable(typeof(ClassWithIgnoredConflictingProperty))]
         [JsonSerializable(typeof(ClassWithExtensionDataConflictingProperty))]
+        [JsonSerializable(typeof(EmployeeLocation))]
+        [JsonSerializable(typeof(EmployeeLocation[]))]
+        [JsonSerializable(typeof(OfficeWorker))]
+        [JsonSerializable(typeof(Office))]
+        [JsonSerializable(typeof(RemoteWorker))]
         internal sealed partial class ReferenceHandlerTestsContext_Default : JsonSerializerContext
         {
         }
diff --git a/src/libraries/tests.proj b/src/libraries/tests.proj
index 76f22763eac181..a71adb42e82b3e 100644
--- a/src/libraries/tests.proj
+++ b/src/libraries/tests.proj
@@ -256,6 +256,14 @@
 
     <!-- https://github.com/dotnet/runtime/issues/50493 -->
     <ProjectExclusions Include="$(RepoRoot)\src\tests\FunctionalTests\Android\Device_Emulator\AOT\Android.Device_Emulator.Aot.Test.csproj" />
+
+    <!-- https://github.com/dotnet/runtime/issues/111699 -->
+    <ProjectExclusions Include="$(RepoRoot)\src\tests\FunctionalTests\Android\Device_Emulator\LibraryMode_AOT_LLVM\Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.csproj" />
+  </ItemGroup>
+
+  <ItemGroup Condition="('$(TargetOS)' == 'android' or '$(TargetsLinuxBionic)' == 'true') and '$(TargetArchitecture)' == 'arm' and '$(RunDisabledAndroidTests)' != 'true'">
+    <!-- https://github.com/dotnet/runtime/issues/111699 -->
+    <ProjectExclusions Include="$(RepoRoot)\src\tests\FunctionalTests\Android\Device_Emulator\LibraryMode_AOT_LLVM\Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.csproj" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetOS)' == 'ios' and '$(RunDisablediOSTests)' != 'true'">
diff --git a/src/mono/mono/mini/method-to-ir.c b/src/mono/mono/mini/method-to-ir.c
index 5fc3320278f3de..d966d0ec0de6bf 100644
--- a/src/mono/mono/mini/method-to-ir.c
+++ b/src/mono/mono/mini/method-to-ir.c
@@ -10142,6 +10142,8 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
 							EMIT_NEW_PCONST (cfg, *sp, NULL);
 							sp++;
 						} else if (il_op == MONO_CEE_LDFLD || il_op == MONO_CEE_LDSFLD) {
+							// method_make_alwaysthrow_typeloadfailure currently doesn't work with inlining
+							INLINE_FAILURE("type load error");
 							// An object is expected here. It may be impossible to correctly infer its type,
 							// we turn this entire method into a throw.
 							method_make_alwaysthrow_typeloadfailure (cfg, klass);
diff --git a/src/mono/msbuild/android/build/AndroidBuild.props b/src/mono/msbuild/android/build/AndroidBuild.props
index 9263ef8a7c82d2..de95c1d581efbb 100644
--- a/src/mono/msbuild/android/build/AndroidBuild.props
+++ b/src/mono/msbuild/android/build/AndroidBuild.props
@@ -1,18 +1,21 @@
 <Project>
   <PropertyGroup>
     <RuntimeIdentifier>$(TargetOS)-$(TargetArchitecture.ToLowerInvariant())</RuntimeIdentifier>
-    <UseMonoRuntime>true</UseMonoRuntime>
-    <UseMonoJustInterp Condition="'$(RunAOTCompilation)' == 'true' and '$(MonoForceInterpreter)' == 'true'">true</UseMonoJustInterp>
+    <!-- TODO: Revisit this.
+      We need to check for RuntimeFlavor setting as for runtime tests we always import CoreCLR props which force UseMonoRuntime=false (see: https://github.com/dotnet/runtime/issues/111919) -->
+    <UseMonoRuntime Condition="'$(RuntimeFlavor)' == 'coreclr'">false</UseMonoRuntime>
+    <UseMonoRuntime Condition="'$(UseMonoRuntime)' == '' or '$(RuntimeFlavor)' == 'mono'">true</UseMonoRuntime>
+    <UseMonoJustInterp Condition="'$(UseMonoRuntime)' == 'true' and '$(RunAOTCompilation)' == 'true' and '$(MonoForceInterpreter)' == 'true'">true</UseMonoJustInterp>
 
     <_HostOS Condition="$([MSBuild]::IsOSPlatform('Windows'))">windows</_HostOS>
     <_HostOS Condition="$([MSBuild]::IsOSPlatform('OSX'))">osx</_HostOS>
     <_HostOS Condition="'$(_HostOS)' == ''">linux</_HostOS>
 
-    <_IsLibraryMode Condition="'$(UseNativeAOTRuntime)' != 'true' and '$(NativeLib)' != ''">true</_IsLibraryMode>
-    <_ReadRuntimeComponentsManifestTargetName Condition="'$(UseNativeAOTRuntime)' != 'true'">_MonoReadAvailableComponentsManifest</_ReadRuntimeComponentsManifestTargetName>
+    <_IsLibraryMode Condition="'$(UseMonoRuntime)' == 'true' and '$(UseNativeAOTRuntime)' != 'true' and '$(NativeLib)' != ''">true</_IsLibraryMode>
+    <_ReadRuntimeComponentsManifestTargetName Condition="'$(UseMonoRuntime)' == 'true' and '$(UseNativeAOTRuntime)' != 'true'">_MonoReadAvailableComponentsManifest</_ReadRuntimeComponentsManifestTargetName>
 
     <AndroidBuildAfterThisTarget Condition="'$(AndroidBuildAfterThisTarget)' == ''">Publish</AndroidBuildAfterThisTarget>
-    <AndroidBuildDependsOn>
+    <AndroidBuildDependsOn Condition="'$(UseMonoRuntime)' == 'true'">
         $(_ReadRuntimeComponentsManifestTargetName);
         _InitializeCommonProperties;
         _BeforeAndroidBuild;
@@ -23,11 +26,19 @@
         _AndroidGenerateAppBundle;
         _AfterAndroidBuild
     </AndroidBuildDependsOn>
+    <AndroidBuildDependsOn Condition="'$(UseMonoRuntime)' == 'false'">
+        _InitializeCommonProperties;
+        _BeforeAndroidBuild;
+        _AndroidResolveReferences;
+        _AndroidGenerateAppBundle;
+        _AfterAndroidBuild
+    </AndroidBuildDependsOn>
 
     <!-- When building on Helix $(_CommonTargetsDir) will be properly set, otherwise we have to set it to a in-tree location -->
     <_CommonTargetsDir Condition="'$(_CommonTargetsDir)' == ''">$([MSBuild]::NormalizeDirectory($(MSBuildThisFileDirectory), '..', '..', 'common'))</_CommonTargetsDir>
   </PropertyGroup>
 
-  <Import Condition="'$(UseNativeAOTRuntime)' != 'true'" Project="$(_CommonTargetsDir)CommonMobileBuild.props" />
-  <Import Condition="'$(UseNativeAOTRuntime)' != 'true'" Project="$(_CommonTargetsDir)RuntimeComponentManifest.targets" />
+  <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and '$(UseMonoRuntime)' == 'true'" Project="$(_CommonTargetsDir)CommonMobileBuild.props" />
+  <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and '$(UseMonoRuntime)' == 'true'" Project="$(_CommonTargetsDir)RuntimeComponentManifest.targets" />
+  <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and ('$(UseMonoRuntime)' == 'true' and '$(RunAOTCompilation)' == 'true')" Project="$(_CommonTargetsDir)MonoAOTCompiler.props" />
 </Project>
\ No newline at end of file
diff --git a/src/mono/msbuild/android/build/AndroidBuild.targets b/src/mono/msbuild/android/build/AndroidBuild.targets
index 6774939f2fba4e..b5c2a588962631 100644
--- a/src/mono/msbuild/android/build/AndroidBuild.targets
+++ b/src/mono/msbuild/android/build/AndroidBuild.targets
@@ -7,9 +7,6 @@
     <EnableDefaultAssembliesToBundle Condition="'$(EnableDefaultAssembliesToBundle)' == ''">false</EnableDefaultAssembliesToBundle>
   </PropertyGroup>
 
-  <UsingTask Condition="'$(RunAOTCompilation)' == 'true'"
-             TaskName="NdkToolFinderTask"
-             AssemblyFile="$(MobileBuildTasksAssemblyPath)" />
   <UsingTask Condition="'$(AndroidGenerateAppBundle)' == 'true'"
              TaskName="AndroidAppBuilderTask"
              AssemblyFile="$(AndroidAppBuilderTasksAssemblyPath)" />
@@ -24,6 +21,8 @@
     <PropertyGroup>
       <_MobileIntermediateOutputPath>$([MSBuild]::NormalizeDirectory($(IntermediateOutputPath), 'mobile'))</_MobileIntermediateOutputPath>
       <TargetArchitecture Condition="'$(TargetArchitecture)' == ''">$(PlatformTarget)</TargetArchitecture>
+      <AppName Condition="'$(AppName)' == ''">$(AssemblyName)</AppName>
+      <StripDebugSymbols Condition="'$(StripDebugSymbols)' == ''">False</StripDebugSymbols>
     </PropertyGroup>
 
     <PropertyGroup>
@@ -38,10 +37,11 @@
       <AndroidBundleDir Condition="'$(AndroidBundleDir)' == ''">$([MSBuild]::NormalizeDirectory('$(OutDir)', 'Bundle'))</AndroidBundleDir>
 
       <BundleDir>$(AndroidBundleDir)</BundleDir>
-      <_MonoHeaderPath>$([MSBuild]::NormalizeDirectory($(MicrosoftNetCoreAppRuntimePackRidNativeDir), 'include', 'mono-2.0'))</_MonoHeaderPath>
+      <RuntimeHeaders>$([MSBuild]::NormalizeDirectory($(MicrosoftNetCoreAppRuntimePackRidNativeDir), 'include', 'mono-2.0'))</RuntimeHeaders>
+      <RuntimeHeaders Condition="'$(UseMonoRuntime)' == 'false' and '$(UseNativeAOTRuntime)' != 'true'">$(CoreClrProjectRoot)hosts\inc</RuntimeHeaders>
     </PropertyGroup>
 
-    <ItemGroup Condition="'$(UseNativeAOTRuntime)' != 'true'">
+    <ItemGroup Condition="'$(UseMonoRuntime)' == 'true' and '$(UseNativeAOTRuntime)' != 'true'">
       <RuntimeComponents Condition="'$(UseAllRuntimeComponents)' == 'true'" Include="@(_MonoRuntimeAvailableComponents)" />
       <!-- Make sure marshal-ilgen is included in the components list. -->
       <RuntimeComponents Condition="'$(UseAllRuntimeComponents)' != 'true'" Include="marshal-ilgen" KeepDuplicates="false" />
@@ -94,7 +94,7 @@
     <PropertyGroup>
       <_AOTMode Condition="'$(UseMonoJustInterp)' != 'true'">Normal</_AOTMode>
       <_AOTMode Condition="'$(UseMonoJustInterp)' == 'true'">JustInterp</_AOTMode>
-      <_AOTMode Condition="'$(ForceFullAOT)' == 'true'">Full</_AOTMode>
+      <_AOTMode Condition="'$(ForceAOT)' == 'true' and '$(AOTWithLibraryFiles)' != 'true'">Full</_AOTMode>
       <_AotOutputType>AsmOnly</_AotOutputType>
     </PropertyGroup>
     <!-- Should consider renaming property to something more appropriate-->
@@ -112,33 +112,11 @@
       <_AotOutputType>ObjectFile</_AotOutputType>
     </PropertyGroup>
 
-    <PropertyGroup>
-      <_Triple Condition="'$(TargetArchitecture)' == 'arm'">armv7-linux-gnueabi</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'arm64'">aarch64-linux-android</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'x86'">i686-linux-android</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'x64'">x86_64-linux-android</_Triple>
-    </PropertyGroup>
-
-    <PropertyGroup>
-      <_AsOptions>-target $(_Triple) -c -x assembler</_AsOptions>
-      <_LdName>clang</_LdName>
-      <_LdOptions>-fuse-ld=lld</_LdOptions>
-      <_AsName>clang</_AsName>
-    </PropertyGroup>
-
+    <!-- Extend default AOT arguments -->
     <ItemGroup>
-      <MonoAOTCompilerDefaultAotArguments Include="static" />
-      <MonoAOTCompilerDefaultAotArguments Include="dwarfdebug" />
       <MonoAOTCompilerDefaultAotArguments Condition="'$(_IsLibraryMode)' == 'true'" Include="direct-icalls" />
       <MonoAOTCompilerDefaultAotArguments Condition="'$(_UsesRuntimeInitCallback)' == 'true' and $(CustomRuntimeInitCallback) == ''" Include="runtime-init-callback" />
       <MonoAOTCompilerDefaultAotArguments Condition="'$(_UsesRuntimeInitCallback)' == 'true' and $(CustomRuntimeInitCallback) != ''" Include="runtime-init-callback=$(CustomRuntimeInitCallback)" />
-
-      <MonoAOTCompilerDefaultAotArguments Include="nimt-trampolines=2000" />
-      <MonoAOTCompilerDefaultAotArguments Include="ntrampolines=10000" />
-      <MonoAOTCompilerDefaultAotArguments Include="nrgctx-fetch-trampolines=256" />
-      <MonoAOTCompilerDefaultAotArguments Include="ngsharedvt-trampolines=4400" />
-      <MonoAOTCompilerDefaultAotArguments Include="nftnptr-arg-trampolines=4000" />
-      <MonoAOTCompilerDefaultAotArguments Include="nrgctx-trampolines=31000" />
     </ItemGroup>
 
     <PropertyGroup>
@@ -150,11 +128,6 @@
       <AndroidLibraryMinApiLevel Condition="'$(AndroidLibraryMinApiLevel)' == ''">21</AndroidLibraryMinApiLevel>
     </PropertyGroup>
 
-    <PropertyGroup Condition="'$(AOTWithLibraryFiles)' == 'true' or '$(_IsLibraryMode)' == 'true'">
-      <_AsPrefixPath>$([MSBuild]::EnsureTrailingSlash('$(_AsPrefixPath)'))</_AsPrefixPath>
-      <_ToolPrefixPath>$([MSBuild]::EnsureTrailingSlash('$(_ToolPrefixPath)'))</_ToolPrefixPath>
-    </PropertyGroup>
-
     <ItemGroup>
       <_AotInputAssemblies Include="@(_AssembliesToBundleInternal)"
                            Condition="'%(_AssembliesToBundleInternal._InternalForceInterpret)' != 'true'">
@@ -246,8 +219,7 @@
   </Target>
 
 
-  <Target
-    Name="_AndroidGenerateAppBundle"
+  <Target Name="_AndroidGenerateAppBundle"
     Condition="$(AndroidGenerateAppBundle) == 'true'"
     DependsOnTargets="_AndroidGenerateRuntimeConfig">
 
@@ -255,7 +227,15 @@
       <_NativeDependencies Include="$(LibraryOutputPath)" />
     </ItemGroup>
 
+    <PropertyGroup>
+      <!-- TODO: Revisit this.
+        We are using a private property to determine the target runtime, we should instead unify the resolution with Apple targets instead, (see: https://github.com/dotnet/runtime/issues/111923) -->
+      <_RuntimeFlavor>Mono</_RuntimeFlavor>
+      <_RuntimeFlavor Condition="'$(UseMonoRuntime)' == 'false' and '$(UseNativeAOTRuntime)' != 'true'">CoreCLR</_RuntimeFlavor>
+    </PropertyGroup>
+
     <AndroidAppBuilderTask
+        RuntimeFlavor="$(_RuntimeFlavor)"
         AppDir="$(AndroidBuildDir)"
         Assemblies="@(_AssembliesToBundleInternal)"
         DiagnosticPorts="$(DiagnosticPorts)"
@@ -266,13 +246,13 @@
         ForceInterpreter="$(MonoForceInterpreter)"
         IsLibraryMode="$(_IsLibraryMode)"
         MainLibraryFileName="$(MainLibraryFileName)"
-        MonoRuntimeHeaders="$(_MonoHeaderPath)"
+        MonoRuntimeHeaders="$(RuntimeHeaders)"
         NativeDependencies="@(_NativeDependencies)"
         OutputDir="$(AndroidBundleDir)"
-        ProjectName="$(AssemblyName)"
+        ProjectName="$(AppName)"
         RuntimeComponents="@(RuntimeComponents)"
         RuntimeIdentifier="$(RuntimeIdentifier)"
-        StripDebugSymbols="False">
+        StripDebugSymbols="$(StripDebugSymbols)">
         <Output TaskParameter="ApkBundlePath" PropertyName="ApkBundlePath" />
         <Output TaskParameter="ApkPackageId"  PropertyName="ApkPackageId" />
     </AndroidAppBuilderTask>
diff --git a/src/mono/msbuild/apple/build/AppleBuild.props b/src/mono/msbuild/apple/build/AppleBuild.props
index 6625f122c63efb..c1e8952f835e76 100644
--- a/src/mono/msbuild/apple/build/AppleBuild.props
+++ b/src/mono/msbuild/apple/build/AppleBuild.props
@@ -12,6 +12,8 @@
   <PropertyGroup>
     <iOSLikeDedup Condition="'$(iOSLikeDedup)' == '' and '$(RunAOTCompilation)' == 'true' and '$(MonoForceInterpreter)' != 'true'">true</iOSLikeDedup>
     <RuntimeIdentifier>$(TargetOS)-$(TargetArchitecture.ToLowerInvariant())</RuntimeIdentifier>
+    <!-- TODO: Revisit this.
+      We need to check for RuntimeFlavor setting as for runtime tests we always import CoreCLR props which force UseMonoRuntime=false (see: https://github.com/dotnet/runtime/issues/111919) -->
     <UseMonoRuntime Condition="'$(UseMonoRuntime)' == ''">true</UseMonoRuntime>
     <UseMonoJustInterp Condition="'$(RunAOTCompilation)' == 'true' and '$(MonoForceInterpreter)' == 'true'">true</UseMonoJustInterp>
 
@@ -46,4 +48,5 @@
 
   <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and '$(UseMonoRuntime)' != 'false'" Project="$(_CommonTargetsDir)CommonMobileBuild.props" />
   <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and '$(UseMonoRuntime)' != 'false'" Project="$(_CommonTargetsDir)RuntimeComponentManifest.targets" />
+  <Import Condition="'$(UseNativeAOTRuntime)' != 'true' and ('$(UseMonoRuntime)' != 'false' and '$(RunAOTCompilation)' == 'true')" Project="$(_CommonTargetsDir)MonoAOTCompiler.props" />
 </Project>
\ No newline at end of file
diff --git a/src/mono/msbuild/apple/build/AppleBuild.targets b/src/mono/msbuild/apple/build/AppleBuild.targets
index 0aafe5b5612d09..d14a4c7b79a069 100644
--- a/src/mono/msbuild/apple/build/AppleBuild.targets
+++ b/src/mono/msbuild/apple/build/AppleBuild.targets
@@ -54,7 +54,7 @@
       <AppleBundleDir Condition="'$(AppleBundleDir)' == ''">$([MSBuild]::NormalizeDirectory('$(OutDir)', 'Bundle'))</AppleBundleDir>
 
       <BundleDir>$(AppleBundleDir)</BundleDir>
-      <_MonoHeaderPath Condition="'$(UseMonoRuntime)' != 'false' and '$(UseNativeAOTRuntime)' != 'true'">$([MSBuild]::NormalizeDirectory($(MicrosoftNetCoreAppRuntimePackRidNativeDir), 'include', 'mono-2.0'))</_MonoHeaderPath>
+      <RuntimeHeaders Condition="'$(UseMonoRuntime)' != 'false' and '$(UseNativeAOTRuntime)' != 'true'">$([MSBuild]::NormalizeDirectory($(MicrosoftNetCoreAppRuntimePackRidNativeDir), 'include', 'mono-2.0'))</RuntimeHeaders>
       <_AotModuleTablePath>$(AppleBundleDir)\modules.m</_AotModuleTablePath>
       <AppName Condition="'$(AppName)' == ''">$(AssemblyName)</AppName>
     </PropertyGroup>
@@ -324,7 +324,7 @@
       InvariantGlobalization="$(InvariantGlobalization)"
       IsLibraryMode="$(_IsLibraryMode)"
       MainLibraryFileName="$(MainLibraryFileName)"
-      MonoRuntimeHeaders="$(_MonoHeaderPath)"
+      MonoRuntimeHeaders="$(RuntimeHeaders)"
       NativeMainSource="$(NativeMainSource)"
       NativeDependencies="@(NativeDependencies)"
       Optimized="$(Optimized)"
diff --git a/src/mono/msbuild/common/LibraryBuilder.targets b/src/mono/msbuild/common/LibraryBuilder.targets
index 9ea4a4562ee1e9..bb93a3b8ca54f0 100644
--- a/src/mono/msbuild/common/LibraryBuilder.targets
+++ b/src/mono/msbuild/common/LibraryBuilder.targets
@@ -33,7 +33,7 @@
       ExtraLinkerArguments="@(_ExtraLinkerArgs)"
       ExtraSources="@(_ExtraLibrarySources)"
       IsSharedLibrary="$(_IsSharedLibrary)"
-      MonoRuntimeHeaders="$(_MonoHeaderPath)"
+      MonoRuntimeHeaders="$(RuntimeHeaders)"
       Name="$(AssemblyName)"
       OutputDirectory="$(BundleDir)"
       RuntimeIdentifier="$(RuntimeIdentifier)"
diff --git a/src/tasks/AotCompilerTask/MonoAOTCompiler.props b/src/mono/msbuild/common/MonoAOTCompiler.props
similarity index 82%
rename from src/tasks/AotCompilerTask/MonoAOTCompiler.props
rename to src/mono/msbuild/common/MonoAOTCompiler.props
index 1fc8e8f41adf50..d717c8e2df3a44 100644
--- a/src/tasks/AotCompilerTask/MonoAOTCompiler.props
+++ b/src/mono/msbuild/common/MonoAOTCompiler.props
@@ -4,8 +4,6 @@
     <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'arm64' and '$(TargetOS)' == 'maccatalyst'" Include="mtriple=arm64-apple-ios15.0-macabi" />
     <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'x64' and '$(TargetOS)' != 'maccatalyst'" Include="mtriple=x86_64-ios" />
     <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'x64' and '$(TargetOS)' == 'maccatalyst'" Include="mtriple=x86_64-apple-ios15.0-macabi" />
-    <MonoAOTCompilerDefaultAotArguments Include="static" />
-    <MonoAOTCompilerDefaultAotArguments Include="dwarfdebug" />
     <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'arm64'" Include="mattr=+crc" /> <!-- enable System.Runtime.Intrinsics.Arm (Crc32 and ArmBase for now) -->
     <!--<MonoAOTCompilerDefaultAotArguments Include="direct-pinvoke" />--> <!-- TODO: enable direct-pinvokes (to get rid of -force_loads)-->
   </ItemGroup>
@@ -15,12 +13,6 @@
   <ItemGroup Condition="'$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'maccatalyst'">
     <MonoAOTCompilerDefaultAotArguments Include="direct-icalls" />
   </ItemGroup>
-  <ItemGroup Condition="'$(TargetOS)' == 'android'">
-    <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'arm'" Include="mtriple=armv7-linux-gnueabi" />
-    <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'arm64'" Include="mtriple=aarch64-linux-android" />
-    <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'x86'" Include="mtriple=i686-linux-android" />
-    <MonoAOTCompilerDefaultAotArguments Condition="'$(TargetArchitecture)' == 'x64'" Include="mtriple=x86_64-linux-android" />
-  </ItemGroup>
   <ItemGroup Condition="'$(TargetOS)' == 'android' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'maccatalyst'">
     <!-- Default trampolines run out for libraries tests -->
     <MonoAOTCompilerDefaultAotArguments Include="nimt-trampolines=2000" />
@@ -28,7 +20,9 @@
     <MonoAOTCompilerDefaultAotArguments Include="nrgctx-fetch-trampolines=256" />
     <MonoAOTCompilerDefaultAotArguments Include="ngsharedvt-trampolines=4400" />
     <MonoAOTCompilerDefaultAotArguments Include="nftnptr-arg-trampolines=4000" />
-    <MonoAOTCompilerDefaultAotArguments Include="nrgctx-trampolines=21000" />
+    <MonoAOTCompilerDefaultAotArguments Include="nrgctx-trampolines=31000" />
+    <MonoAOTCompilerDefaultAotArguments Include="static" />
+    <MonoAOTCompilerDefaultAotArguments Include="dwarfdebug" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetOS)' == 'browser'">
     <MonoAOTCompilerDefaultAotArguments Include="no-opt" />
@@ -36,4 +30,14 @@
     <MonoAOTCompilerDefaultAotArguments Include="direct-icalls" />
     <MonoAOTCompilerDefaultAotArguments Include="deterministic" />
   </ItemGroup>
+  <PropertyGroup Condition="'$(TargetOS)' == 'android'">
+    <_Triple Condition="'$(TargetArchitecture)' == 'arm'">armv7-linux-gnueabi</_Triple>
+    <_Triple Condition="'$(TargetArchitecture)' == 'arm64'">aarch64-linux-android</_Triple>
+    <_Triple Condition="'$(TargetArchitecture)' == 'x86'">i686-linux-android</_Triple>
+    <_Triple Condition="'$(TargetArchitecture)' == 'x64'">x86_64-linux-android</_Triple>
+    <_AsOptions>-target $(_Triple) -c -x assembler</_AsOptions>
+    <_LdName>clang</_LdName>
+    <_LdOptions>-fuse-ld=lld</_LdOptions>
+    <_AsName>clang</_AsName>
+  </PropertyGroup>
 </Project>
diff --git a/src/mono/nuget/Microsoft.NET.Runtime.LibraryBuilder.Sdk/Microsoft.NET.Runtime.LibraryBuilder.Sdk.pkgproj b/src/mono/nuget/Microsoft.NET.Runtime.LibraryBuilder.Sdk/Microsoft.NET.Runtime.LibraryBuilder.Sdk.pkgproj
index 8d9a5f20f6e83d..587f7590b31235 100644
--- a/src/mono/nuget/Microsoft.NET.Runtime.LibraryBuilder.Sdk/Microsoft.NET.Runtime.LibraryBuilder.Sdk.pkgproj
+++ b/src/mono/nuget/Microsoft.NET.Runtime.LibraryBuilder.Sdk/Microsoft.NET.Runtime.LibraryBuilder.Sdk.pkgproj
@@ -17,6 +17,7 @@
     <PackageFile Include="$(RepoRoot)\src\mono\msbuild\common\LibraryBuilder.props" TargetPath="Sdk" />
     <PackageFile Include="$(RepoRoot)\src\mono\msbuild\common\LibraryBuilder.targets" TargetPath="Sdk" />
     <PackageFile Include="$(RepoRoot)\src\mono\msbuild\common\CommonMobileBuild.props" TargetPath="Sdk" />
+    <PackageFile Include="$(RepoRoot)\src\mono\msbuild\common\MonoAOTCompiler.props" TargetPath="Sdk" />
     <PackageFile Include="$(RepoRoot)\src\mono\msbuild\common\RuntimeComponentManifest.targets" TargetPath="Sdk" />
   </ItemGroup>
 
diff --git a/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.Current.Manifest/WasmFeatures.props b/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.Current.Manifest/WasmFeatures.props
index c429f998d9befa..289b8c503a39fc 100644
--- a/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.Current.Manifest/WasmFeatures.props
+++ b/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.Current.Manifest/WasmFeatures.props
@@ -3,6 +3,7 @@
     <InvariantTimezone Condition="'$(InvariantTimezone)' == ''">false</InvariantTimezone>
     <InvariantGlobalization Condition="'$(InvariantGlobalization)' == ''">false</InvariantGlobalization>
     <EventSourceSupport Condition="'$(EventSourceSupport)' == ''">false</EventSourceSupport>
+    <UseSizeOptimizedLinq Condition="'$(UseSizeOptimizedLinq)' == ''">true</UseSizeOptimizedLinq>
     <UseSystemResourceKeys Condition="'$(UseSystemResourceKeys)' == ''">true</UseSystemResourceKeys>
     <EnableUnsafeUTF7Encoding Condition="'$(EnableUnsafeUTF7Encoding)' == ''">false</EnableUnsafeUTF7Encoding>
     <HttpActivityPropagationSupport Condition="'$(HttpActivityPropagationSupport)' == ''">false</HttpActivityPropagationSupport>
diff --git a/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.net9.Manifest/WorkloadManifest.json.in b/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.net9.Manifest/WorkloadManifest.json.in
index 0897896dd5ad9f..ac1d63b87404de 100644
--- a/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.net9.Manifest/WorkloadManifest.json.in
+++ b/src/mono/nuget/Microsoft.NET.Workload.Mono.Toolchain.net9.Manifest/WorkloadManifest.json.in
@@ -115,8 +115,8 @@
       "packs": [
         "Microsoft.NETCore.App.Runtime.Mono.net9.osx-arm64",
         "Microsoft.NETCore.App.Runtime.Mono.net9.osx-x64",
-        "Microsoft.NETCore.App.Runtime.osx-arm64",
-        "Microsoft.NETCore.App.Runtime.osx-x64"
+        "Microsoft.NETCore.App.Runtime.net9.osx-arm64",
+        "Microsoft.NETCore.App.Runtime.net9.osx-x64"
       ],
       "extends": [ "microsoft-net-runtime-mono-tooling-net9" ],
       "platforms": [ "osx-arm64", "osx-x64" ]
diff --git a/src/mono/sample/Android/AndroidSampleApp.csproj b/src/mono/sample/Android/AndroidSampleApp.csproj
index 43e92d34c84818..8917de08365c0e 100644
--- a/src/mono/sample/Android/AndroidSampleApp.csproj
+++ b/src/mono/sample/Android/AndroidSampleApp.csproj
@@ -2,151 +2,39 @@
   <PropertyGroup>
     <RuntimeFlavor Condition="'$(RuntimeFlavor)' == ''">Mono</RuntimeFlavor>
     <OutputType>Exe</OutputType>
-    <EnableDefaultCompileItems>false</EnableDefaultCompileItems>
     <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
     <RuntimeIdentifier>android-$(TargetArchitecture)</RuntimeIdentifier>
     <SelfContained>true</SelfContained>
     <PublishTrimmed>true</PublishTrimmed>
     <TrimMode>Link</TrimMode>
     <ForceAOT Condition="'$(ForceAOT)' == ''">false</ForceAOT>
+    <RunAOTCompilation Condition="'$(RunAOTCompilation)' == ''">$(ForceAOT)</RunAOTCompilation>
+    <StripDebugSymbols Condition="'$(Configuration)' == 'Release'">True</StripDebugSymbols>
+    <AppName>HelloAndroid</AppName>
+    <MainLibraryFileName>$(AssemblyName).dll</MainLibraryFileName>
+    <EnableDefaultAssembliesToBundle>true</EnableDefaultAssembliesToBundle>
+    <!-- With Mono AOT on Android we default to not using AOT data file optimization as it can degrade runtime performance for small binary size improvements. -->
+    <_UseAotDataFile Condition="'$(RunAOTCompilation)' == 'true'">false</_UseAotDataFile>
   </PropertyGroup>
 
   <ItemGroup>
     <RuntimeComponents Condition="'$(RuntimeFlavor)' == 'Mono' and '$(DiagnosticPorts)' != ''" Include="diagnostics_tracing" />
   </ItemGroup>
 
-  <Import Project="$(RepoTasksDir)AotCompilerTask\MonoAOTCompiler.props" />
-  <UsingTask TaskName="AndroidAppBuilderTask" 
-             AssemblyFile="$(AndroidAppBuilderTasksAssemblyPath)"/>
-  <UsingTask Condition="'$(RuntimeFlavor)' == 'Mono'"
-             TaskName="MonoAOTCompiler" 
-             AssemblyFile="$(MonoAOTCompilerTasksAssemblyPath)" />
-  <UsingTask Condition="'$(RuntimeFlavor)' == 'Mono' and '$(RunAOTCompilation)' == 'true'"
-             TaskName="NdkToolFinderTask"
-             AssemblyFile="$(MobileBuildTasksAssemblyPath)" />
-  
-  <PropertyGroup Condition="'$(RuntimeFlavor)' == 'Mono' and '$(RunAOTCompilation)' == 'true'">
-    <_MobileIntermediateOutputPath>$(IntermediateOutputPath)mobile</_MobileIntermediateOutputPath>
-  </PropertyGroup>
-
-  <Target Name="BuildApp" AfterTargets="CopyFilesToPublishDirectory">
-    <PropertyGroup>
-      <StripDebugSymbols>False</StripDebugSymbols>
-      <StripDebugSymbols Condition="'$(Configuration)' == 'Release'">True</StripDebugSymbols>
-      <ApkDir>$(PublishDir)apk\</ApkDir>
-    </PropertyGroup>
-
-    <ItemGroup Condition="'$(RuntimeFlavor)' == 'Mono'">
-      <AotInputAssemblies Include="$(PublishDir)\*.dll">
-        <AotArguments>@(MonoAOTCompilerDefaultAotArguments, ';')</AotArguments>
-        <ProcessArguments>@(MonoAOTCompilerDefaultProcessArguments, ';')</ProcessArguments>
-      </AotInputAssemblies>
-    </ItemGroup>
-
-    <MakeDir Directories="$(_MobileIntermediateOutputPath)" 
-             Condition="'$(RuntimeFlavor)' == 'Mono' and '$(ForceAOT)' == 'true'"/>
-    <RemoveDir Directories="$(ApkDir)" />
-
-    <Message Importance="High" Text="Path: $(PublishDir)" />
-    <Message Importance="High" Text="SourceDir: $(OutputPath)" />
-
-    <!-- Make sure marshal-ilgen is included in the components list. -->
-    <ItemGroup Condition="'$(RuntimeFlavor)' == 'Mono'">
-      <RuntimeComponents Include="marshal-ilgen" KeepDuplicates="false"/>
-    </ItemGroup>
-
-    <PropertyGroup Condition="'$(RuntimeFlavor)' == 'Mono' and '$(ForceAOT)' == 'true' and '$(AOTWithLibraryFiles)' == 'true'">
-      <_AotOutputType>Library</_AotOutputType>
-      <_AotLibraryFormat>So</_AotLibraryFormat>
-      <_AotMode Condition="'$(AotMode)' == ''">Normal</_AotMode>
-    </PropertyGroup>
-
-    <PropertyGroup Condition="'$(RuntimeFlavor)' == 'Mono' and '$(ForceAOT)' == 'true' and '$(AOTWithLibraryFiles)' != 'true'">
-      <_AotOutputType>AsmOnly</_AotOutputType>
-      <_AotModulesTablePath>$(ApkDir)\modules.c</_AotModulesTablePath>
-      <_AotMode Condition="'$(AotMode)' == ''">Full</_AotMode>
-    </PropertyGroup>
-
-    <PropertyGroup>
-      <AndroidLibraryMinApiLevel Condition="'$(AndroidLibraryMinApiLevel)' == ''">21</AndroidLibraryMinApiLevel>
-    </PropertyGroup>
+  <Import Project="$(MonoProjectRoot)\msbuild\android\build\AndroidBuild.props" />
+  <Import Project="$(MonoProjectRoot)\msbuild\android\build\AndroidBuild.InTree.targets" />
 
-    <PropertyGroup Condition="'$(RuntimeFlavor)' == 'Mono' and '$(AOTWithLibraryFiles)' == 'true'">
-      <_AsPrefixPath>$([MSBuild]::EnsureTrailingSlash('$(_AsPrefixPath)'))</_AsPrefixPath>
-      <_ToolPrefixPath>$([MSBuild]::EnsureTrailingSlash('$(_ToolPrefixPath)'))</_ToolPrefixPath>
-    </PropertyGroup>
-
-    <PropertyGroup>
-      <_Triple Condition="'$(TargetArchitecture)' == 'arm'">armv7-linux-gnueabi</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'arm64'">aarch64-linux-android</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'x86'">i686-linux-android</_Triple>
-      <_Triple Condition="'$(TargetArchitecture)' == 'x64'">x86_64-linux-android</_Triple>
-    </PropertyGroup>
-
-    <PropertyGroup>
-      <_AsOptions>-target $(_Triple) -c -x assembler</_AsOptions>
-      <_LdName>clang</_LdName>
-      <_LdOptions>-fuse-ld=lld</_LdOptions>
-      <_AsName>clang</_AsName>
-    </PropertyGroup>
-
-    <MonoAOTCompiler Condition="'$(RuntimeFlavor)' == 'Mono' and '$(ForceAOT)' == 'true'"
-        AotModulesTablePath="$(_AotModulesTablePath)"
-        AsName="$(_AsName)"
-        AsOptions="$(_AsOptions)"
-        Assemblies="@(AotInputAssemblies)"
-        CompilerBinaryPath="@(MonoAotCrossCompiler->WithMetadataValue('RuntimeIdentifier','$(TargetOS)-$(TargetArchitecture.ToLowerInvariant())'))"
-        IntermediateOutputPath="$(IntermediateOutputPath)"
-        LdName="$(_LdName)"
-        LdOptions="$(_LdOptions)"
-        LibraryFormat="$(_AotLibraryFormat)"
-        LLVMPath="$(MonoAotCrossDir)"
-        Mode="$(_AotMode)"
-        OutputDir="$(_MobileIntermediateOutputPath)"
-        OutputType="$(_AotOutputType)"
-        Triple="$(_Triple)"
-        UseAotDataFile="false"
-        UseLLVM="$(UseLLVM)">
-        <Output TaskParameter="CompiledAssemblies" ItemName="BundleAssemblies" />
-    </MonoAOTCompiler>
-
-    <PropertyGroup>
-      <RuntimeHeaders>$(MicrosoftNetCoreAppRuntimePackDir)runtimes\android-$(TargetArchitecture)\native\include\mono-2.0</RuntimeHeaders>
-      <RuntimeHeaders Condition="'$(RuntimeFlavor)' == 'CoreCLR'">$(CoreClrProjectRoot)hosts\inc</RuntimeHeaders>
-    </PropertyGroup>
-
-    <AndroidAppBuilderTask
-        RuntimeFlavor="$(RuntimeFlavor)"
-        AppDir="$(PublishDir)"
-        Assemblies="@(BundleAssemblies)"
-        DiagnosticPorts="$(DiagnosticPorts)"
-        ExtraLinkerArguments="@(_ExtraAppLinkerArgs)"
-        ForceInterpreter="$(MonoForceInterpreter)"
-        ForceAOT="$(ForceAOT)"
-        MainLibraryFileName="$(AssemblyName).dll"
-        MonoRuntimeHeaders="$(RuntimeHeaders)"
-        OutputDir="$(ApkDir)"
-        ProjectName="HelloAndroid"
-        RuntimeIdentifier="$(RuntimeIdentifier)"
-        StripDebugSymbols="$(StripDebugSymbols)">
-        <Output TaskParameter="ApkBundlePath" PropertyName="ApkBundlePath" />
-        <Output TaskParameter="ApkPackageId" PropertyName="ApkPackageId" />
-    </AndroidAppBuilderTask>
-
-    <Message Importance="High" Text="Apk:       $(ApkBundlePath)"/>
-    <Message Importance="High" Text="PackageId: $(ApkPackageId)"/>
-
-    <Exec Condition="'$(DeployAndRun)' == 'true' and '$(ArchiveTests)' != 'true'" Command="dotnet xharness android test --package-name=net.dot.HelloAndroid --instrumentation=net.dot.MonoRunner --app=$(ApkDir)/bin/HelloAndroid.apk --expected-exit-code=42 --output-directory=$(ApkDir)/log" />
+  <Target Name="RunAppBundle"
+          Condition="'$(ArchiveTests)' != 'true' and '$(DeployAndRun)' == 'true'"
+          AfterTargets="_AfterAndroidBuild"
+          DependsOnTargets="$(AndroidBuildDependsOn)" >
+    <Exec Command="dotnet xharness android test --package-name=net.dot.HelloAndroid --instrumentation=net.dot.MonoRunner --app=$(AndroidBundleDir)/bin/HelloAndroid.apk --expected-exit-code=42 --output-directory=$(AndroidBundleDir)/log" />
   </Target>
 
-  <ItemGroup>
-    <Compile Include="Program.cs" />
-  </ItemGroup>
-
   <Target Name="CopySampleAppToHelixTestDir" 
           Condition="'$(ArchiveTests)' == 'true'" 
-          AfterTargets="Build"
-          DependsOnTargets="Publish;BuildApp" >
+          AfterTargets="_AfterAndroidBuild"
+          DependsOnTargets="$(AndroidBuildDependsOn)" >
     <PropertyGroup>
       <!-- Helix properties -->
       <!-- AnyCPU as Platform-->
diff --git a/src/mono/sample/Android/Makefile b/src/mono/sample/Android/Makefile
index 82539298194b20..f0be463045a7c6 100644
--- a/src/mono/sample/Android/Makefile
+++ b/src/mono/sample/Android/Makefile
@@ -44,7 +44,7 @@ run: appbuilder
 	/p:ForceAOT=$(AOT) \
 	/p:AOTWithLibraryFiles=$(AOT_WITH_LIBRARY_FILES) \
 	/p:MonoForceInterpreter=$(INTERP) \
-	/p:UseLLVM=$(USE_LLVM) \
+	/p:MonoEnableLLVM=$(USE_LLVM) \
 	/p:RunActivity=false \
 	'/p:DiagnosticPorts="$(DIAGNOSTIC_PORTS)"' \
 	/p:RuntimeFlavor=$(RUNTIME_FLAVOR) \
diff --git a/src/mono/sample/mbr/apple/AppleDelta.csproj b/src/mono/sample/mbr/apple/AppleDelta.csproj
index 716fd9b61c64fe..12706ebedeb0fc 100644
--- a/src/mono/sample/mbr/apple/AppleDelta.csproj
+++ b/src/mono/sample/mbr/apple/AppleDelta.csproj
@@ -27,7 +27,7 @@
     <DevTeamProvisioning Condition="'$(TargetOS)' == 'maccatalyst' and '$(DevTeamProvisioning)' == ''">-</DevTeamProvisioning>
   </PropertyGroup>
 
-  <Import Project="$(RepoTasksDir)AotCompilerTask\MonoAOTCompiler.props" />
+  <Import Project="$(MonoProjectRoot)\msbuild\common\MonoAOTCompiler.props" />
   <UsingTask TaskName="AppleAppBuilderTask"
              AssemblyFile="$(AppleAppBuilderTasksAssemblyPath)" />
 
diff --git a/src/mono/sample/wasm/blazor-frame/blazor.csproj b/src/mono/sample/wasm/blazor-frame/blazor.csproj
index 20ff42598cd6a8..86e8e03e032fb9 100644
--- a/src/mono/sample/wasm/blazor-frame/blazor.csproj
+++ b/src/mono/sample/wasm/blazor-frame/blazor.csproj
@@ -11,8 +11,8 @@
 
   <ItemGroup>
     <!-- TODO un-pin this when it's possible -->
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.24572.1" />
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.24572.1" PrivateAssets="all" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.25073.4" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.25073.4" PrivateAssets="all" />
   </ItemGroup>
 
 </Project>
diff --git a/src/mono/wasm/testassets/BlazorBasicTestApp/App/BlazorBasicTestApp.csproj b/src/mono/wasm/testassets/BlazorBasicTestApp/App/BlazorBasicTestApp.csproj
index d9067726d9bba1..107122f796b64d 100644
--- a/src/mono/wasm/testassets/BlazorBasicTestApp/App/BlazorBasicTestApp.csproj
+++ b/src/mono/wasm/testassets/BlazorBasicTestApp/App/BlazorBasicTestApp.csproj
@@ -7,8 +7,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.24572.1" />
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.24572.1" PrivateAssets="all" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.25073.4" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.25073.4" PrivateAssets="all" />
   </ItemGroup>
 
 </Project>
diff --git a/src/mono/wasm/testassets/BlazorBasicTestApp/RazorClassLibrary/RazorClassLibrary.csproj b/src/mono/wasm/testassets/BlazorBasicTestApp/RazorClassLibrary/RazorClassLibrary.csproj
index 9ab5da455e2f3a..7d785ff3dc4f8a 100644
--- a/src/mono/wasm/testassets/BlazorBasicTestApp/RazorClassLibrary/RazorClassLibrary.csproj
+++ b/src/mono/wasm/testassets/BlazorBasicTestApp/RazorClassLibrary/RazorClassLibrary.csproj
@@ -12,7 +12,7 @@
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.AspNetCore.Components.Web" Version="10.0.0-alpha.2.24572.1" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.Web" Version="10.0.0-alpha.2.25073.4" />
   </ItemGroup>
 
 </Project>
diff --git a/src/mono/wasm/testassets/WasmOnAspNetCore/AspNetCoreServer/AspNetCoreServer.csproj b/src/mono/wasm/testassets/WasmOnAspNetCore/AspNetCoreServer/AspNetCoreServer.csproj
index e873b8660db5ba..7b1b4ec3d34d98 100644
--- a/src/mono/wasm/testassets/WasmOnAspNetCore/AspNetCoreServer/AspNetCoreServer.csproj
+++ b/src/mono/wasm/testassets/WasmOnAspNetCore/AspNetCoreServer/AspNetCoreServer.csproj
@@ -10,7 +10,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.Server" Version="10.0.0-alpha.2.24572.1" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.Server" Version="10.0.0-alpha.2.25073.4" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/mono/wasm/testassets/WasmOnAspNetCore/BlazorClient/BlazorClient.csproj b/src/mono/wasm/testassets/WasmOnAspNetCore/BlazorClient/BlazorClient.csproj
index 6ca0a0f488b8dc..cdf111a973cf9f 100644
--- a/src/mono/wasm/testassets/WasmOnAspNetCore/BlazorClient/BlazorClient.csproj
+++ b/src/mono/wasm/testassets/WasmOnAspNetCore/BlazorClient/BlazorClient.csproj
@@ -12,8 +12,8 @@
 
   <!-- versions are pinned but when run from WBT level, it's taking in-tree runtime -->
   <ItemGroup>
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.24572.1" />
-    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.24572.1" PrivateAssets="all" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="10.0.0-alpha.2.25073.4" />
+    <PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="10.0.0-alpha.2.25073.4" PrivateAssets="all" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/src/mono/wasm/testassets/WasmOnAspNetCore/Shared/Shared.csproj b/src/mono/wasm/testassets/WasmOnAspNetCore/Shared/Shared.csproj
index 83e2a274b02d05..8c2bce70138411 100644
--- a/src/mono/wasm/testassets/WasmOnAspNetCore/Shared/Shared.csproj
+++ b/src/mono/wasm/testassets/WasmOnAspNetCore/Shared/Shared.csproj
@@ -11,6 +11,6 @@
   <!-- versions are pinned but when run from WBT level, it's taking in-tree runtime -->
   <ItemGroup>
     <PackageReference Include="System.Net.Http.Json" Version="10.0.0-alpha.1.24570.9 " />
-    <PackageReference Include="Microsoft.AspNetCore.SignalR.Client" Version="10.0.0-alpha.2.24572.1" />
+    <PackageReference Include="Microsoft.AspNetCore.SignalR.Client" Version="10.0.0-alpha.2.25073.4" />
   </ItemGroup>
 </Project>
diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m
index ebe0db5c2c202a..56af941fb40033 100644
--- a/src/native/libs/System.Globalization.Native/pal_collation.m
+++ b/src/native/libs/System.Globalization.Native/pal_collation.m
@@ -117,6 +117,11 @@ int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int3
     }
 }
 
+/**
+ * Removes zero-width and other weightless characters such as U+200B (Zero Width Space), 
+ * U+200C (Zero Width Non-Joiner), U+200D (Zero Width Joiner), U+FEFF (Zero Width No-Break Space), 
+ * and the NUL character from the specified string.
+ */
 static NSString* RemoveWeightlessCharacters(NSString* source)
 {
     NSError *error = nil;
@@ -143,10 +148,9 @@ static int32_t IsIndexFound(int32_t fromBeginning, int32_t foundLocation, int32_
 
 /*
 Function: IndexOf
-Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md
+Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md#string-indexing
 */
-Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength,
-                                        const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
+Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
 {
     @autoreleasepool
     {
@@ -158,6 +162,9 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
             return result;
         }
         NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions, true);
+        if (!fromBeginning) // LastIndexOf
+            options |= NSBackwardsSearch;
+
         NSString *searchString = [NSString stringWithCharacters: lpTarget length: (NSUInteger)cwTargetLength];
         NSString *searchStrCleaned = RemoveWeightlessCharacters(searchString);
         NSString *sourceString = [NSString stringWithCharacters: lpSource length: (NSUInteger)cwSourceLength];
@@ -168,7 +175,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
             searchStrCleaned = ConvertToKatakana(searchStrCleaned);
         }
 
-        if (sourceStrCleaned.length == 0 || searchStrCleaned.length == 0)
+        if (searchStrCleaned.length == 0)
         {
             result.location = fromBeginning ? 0 : (int32_t)sourceString.length;
             return result;
@@ -178,9 +185,6 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
         NSString *searchStrPrecomposed = searchStrCleaned.precomposedStringWithCanonicalMapping;
         NSString *sourceStrPrecomposed = sourceStrCleaned.precomposedStringWithCanonicalMapping;
 
-        // last index
-        if (!fromBeginning)
-            options |= NSBackwardsSearch;
 
         // check if there is a possible match and return -1 if not
         // doesn't matter which normalization form is used here
@@ -233,7 +237,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
             result.location = (int32_t)precomposedRange.location;
             result.length = (int32_t)precomposedRange.length;
             if (!(comparisonOptions & IgnoreCase))
-            return result;
+                return result;
         }
 
         // check if sourceString has decomposed form of characters and searchString has precomposed form of characters
diff --git a/src/tasks/AotCompilerTask/MonoAOTCompiler.csproj b/src/tasks/AotCompilerTask/MonoAOTCompiler.csproj
index 35e3f5892c3d40..2a5fce2048862e 100644
--- a/src/tasks/AotCompilerTask/MonoAOTCompiler.csproj
+++ b/src/tasks/AotCompilerTask/MonoAOTCompiler.csproj
@@ -23,11 +23,6 @@
     <Compile Include="..\Common\IsExternalInit.cs" Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'" />
     <Compile Include="$(RepoRoot)src\libraries\System.Private.CoreLib\src\System\Diagnostics\CodeAnalysis\NullableAttributes.cs" Condition="'$(TargetFrameworkIdentifier)' == '.NETFramework'" />
   </ItemGroup>
-  <ItemGroup>
-    <Content Include="MonoAOTCompiler.props">
-      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-    </Content>
-  </ItemGroup>
 
   <!-- GetFilesToPackage assists to place `MonoAOTCompiler.dll` in a NuGet package in Microsoft.NET.Runtime.MonoAOTCompiler.Task.pkgproj for external use -->
   <Target Name="GetFilesToPackage" Returns="@(FilesToPackage)">
diff --git a/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs b/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
index f0118d60db3435..0a47a6a8ee9709 100644
--- a/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
+++ b/src/tests/Common/CoreCLRTestLibrary/PlatformDetection.cs
@@ -25,6 +25,8 @@ public static class PlatformDetection
 
         public static bool IsRareEnumsSupported => !Utilities.IsNativeAot;
 
+        public static bool IsCollectibleAssembliesSupported => !Utilities.IsNativeAot;
+
         private static volatile Tuple<bool> s_lazyNonZeroLowerBoundArraySupported;
         public static bool IsNonZeroLowerBoundArraySupported
         {
diff --git a/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.csproj b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.csproj
new file mode 100644
index 00000000000000..7a80fe0814e636
--- /dev/null
+++ b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.csproj
@@ -0,0 +1,25 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <MonoForceInterpreter>false</MonoForceInterpreter>
+    <RunAOTCompilation>true</RunAOTCompilation>
+    <TestRuntime>true</TestRuntime>
+    <TargetFramework>$(NetCoreAppCurrent)</TargetFramework>
+    <MainLibraryFileName>Android.Device_Emulator.LibraryMode_Aot_Llvm.Test.dll</MainLibraryFileName>
+    <ExpectedExitCode>42</ExpectedExitCode>
+    <EnableAggressiveTrimming>true</EnableAggressiveTrimming>
+    <MonoEnableLLVM>true</MonoEnableLLVM>
+    <ForceAOT>true</ForceAOT>
+    <NativeLib>shared</NativeLib>
+    <ForceLibraryModeGenerateAppBundle>true</ForceLibraryModeGenerateAppBundle>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <!-- Preserves the UnmanagedCallersOnly method -->
+    <TrimmerRootDescriptor Include="$(MSBuildThisFileDirectory)ILLink.Descriptors.xml" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <Compile Include="Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/ILLink.Descriptors.xml b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/ILLink.Descriptors.xml
new file mode 100644
index 00000000000000..96491444910b04
--- /dev/null
+++ b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/ILLink.Descriptors.xml
@@ -0,0 +1,7 @@
+<linker>
+  <assembly fullname="Android.Device_Emulator.LibraryMode_Aot_Llvm.Test">
+    <type fullname="Program">
+      <method name="SayHello" />
+    </type>
+  </assembly>
+</linker>
\ No newline at end of file
diff --git a/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Program.cs b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Program.cs
new file mode 100644
index 00000000000000..916d1f1aa03b80
--- /dev/null
+++ b/src/tests/FunctionalTests/Android/Device_Emulator/LibraryMode_AOT_LLVM/Program.cs
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+
+public static class Program
+{
+    [UnmanagedCallersOnly(EntryPoint = nameof(SayHello))]
+    public static void SayHello()
+    {
+        Console.WriteLine("Called from native!  Hello!");
+    }
+
+    public static int Main()
+    {
+        Console.WriteLine("Hello, Android!"); // logcat
+        return 42;
+    }
+}
diff --git a/src/tests/Interop/COM/ComWrappers/API/Program.cs b/src/tests/Interop/COM/ComWrappers/API/Program.cs
index 3ed01e8604ad3e..8204ce75f6a903 100644
--- a/src/tests/Interop/COM/ComWrappers/API/Program.cs
+++ b/src/tests/Interop/COM/ComWrappers/API/Program.cs
@@ -1121,7 +1121,7 @@ CustomQueryInterfaceResult ICustomQueryInterface.GetInterface(ref Guid iid, out
                     }
                 }
 
-                return CustomQueryInterfaceResult.Failed;
+                return CustomQueryInterfaceResult.NotHandled;
             }
         }
     }
diff --git a/src/tests/JIT/opt/Cloning/loops_with_eh.cs b/src/tests/JIT/opt/Cloning/loops_with_eh.cs
index ca0328e78bdaf4..13b161f6558440 100644
--- a/src/tests/JIT/opt/Cloning/loops_with_eh.cs
+++ b/src/tests/JIT/opt/Cloning/loops_with_eh.cs
@@ -5,7 +5,6 @@
 using System.Runtime.CompilerServices;
 using Xunit;
 
-
 // Cheat codes
 //
 // L   - loop
@@ -17,6 +16,7 @@
 // m   - multiple try exits (TF will remain a try finally)
 // g   - giant finally (TF will remain try finally)
 // p   - regions are serial, not nested
+// TFi - try finally with what follows in the finally
 // 
 // x: we currently cannot clone loops where the try is the first thing
 // as the header and preheader are different regions
@@ -26,6 +26,9 @@ public class LoopsWithEH
     static int[] data;
     static int n;
 
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static void SideEffect() { }
+
     static LoopsWithEH()
     {
         data = new int[100];
@@ -178,7 +181,7 @@ public static int Sum_LxTCC(int[] data, int n)
             {
                 return -1;
             }
-            catch(Exception)
+            catch (Exception)
             {
                 return -2;
             }
@@ -201,7 +204,7 @@ public static int Sum_LxTCcC(int[] data, int n)
             }
             catch (IndexOutOfRangeException)
             {
-                sum +=1;
+                sum += 1;
             }
             catch (Exception)
             {
@@ -536,7 +539,7 @@ public static int Sum_TCLxTfC(int[] data, int n)
                 }
             }
         }
-        catch (Exception) 
+        catch (Exception)
         {
             return -1;
         }
@@ -642,7 +645,7 @@ public static int Sum_TCLxTF(int[] data, int n)
                 {
                     sum += data[i];
                 }
-                finally 
+                finally
                 {
                     sum += 1;
                 }
@@ -807,7 +810,7 @@ public static int Sum_LxTFTF(int[] data, int n)
 
     [Fact]
     public static int Test_LxTFxTF() => Sum_LxTFTF(data, n) - 110;
-    
+
     public static int Sum_TFLxTF(int[] data, int n)
     {
         int sum = 0;
@@ -896,7 +899,7 @@ public static int Sum_TCTFLxTF(int[] data, int n)
                 sum += 1;
             }
         }
-        catch(Exception)
+        catch (Exception)
         {
             return -1;
         }
@@ -937,5 +940,192 @@ public static int Sum_TFTCLxTF(int[] data, int n)
         }
         return sum;
     }
+
+    [Fact]
+    public static int Test_TFiL() => Sum_TFiL(data, n) - 91;
+
+    public static int Sum_TFiL(int[] data, int n)
+    {
+        int sum = 0;
+        try
+        {
+            SideEffect();
+        }
+        finally
+        {
+            sum += 1;
+            for (int i = 0; i < n; i++)
+            {
+                sum += data[i];
+            }
+        }
+
+        return sum;
+    }
+
+    [Fact]
+    public static int Test_TFiLxTF() => Sum_TFiLxTF(data, n) - 131;
+
+    public static int Sum_TFiLxTF(int[] data, int n)
+    {
+        int sum = 0;
+        try
+        {
+            SideEffect();
+        }
+        finally
+        {
+            sum += 1;
+            for (int i = 0; i < n; i++)
+            {
+                sum += 1;
+                try
+                {
+                    sum += data[i];
+                }
+                finally
+                {
+                    sum += 1;
+                }
+            }
+        }
+
+        return sum;
+    }
+
+    [Fact]
+    public static int Test_TFiLxTCc() => Sum_TFiLxTCc(data, n) - 111;
+
+    public static int Sum_TFiLxTCc(int[] data, int n)
+    {
+        int sum = 0;
+        try
+        {
+            SideEffect();
+        }
+        finally
+        {
+            sum += 1;
+            for (int i = 0; i < n; i++)
+            {
+                sum += 1;
+                try
+                {
+                    sum += data[i];
+                }
+                catch (Exception)
+                {
+                    sum += 1;
+                }
+            }
+        }
+
+        return sum;
+    }
+
+    [Fact]
+    public static int Test_TFiLxTC() => Sum_TFiLxTC(data, n) - 112;
+
+    public static int Sum_TFiLxTC(int[] data, int n)
+    {
+        int sum = 0;
+        try
+        {
+            SideEffect();
+        }
+        finally
+        {
+            sum += 1;
+            for (int i = 0; i < n; i++)
+            {
+                sum += 1;
+                try
+                {
+                    sum += data[i];
+                }
+                catch (Exception)
+                {
+                    goto after_loop;
+                }
+            }
+
+        after_loop:
+            sum += 1;
+
+        }
+
+        return sum;
+    }
+
+    [Fact]
+    public static int Test_TFTFiLxTC() => Sum_TFTFiLxTC(data, n) - 113;
+
+    public static int Sum_TFTFiLxTC(int[] data, int n)
+    {
+        int sum = 0;
+        try
+        {
+            try
+            {
+                SideEffect();
+            }
+            finally
+            {
+                sum += 1;
+                for (int i = 0; i < n; i++)
+                {
+                    sum += 1;
+                    try
+                    {
+                        sum += data[i];
+                    }
+                    catch (Exception)
+                    {
+                        goto after_loop;
+                    }
+                }
+
+            after_loop:
+                sum += 1;
+            }
+        }
+        finally
+        {
+            sum += 1;
+        }
+
+        return sum;
+    }
+
+
+    [Fact]
+    public static int Test_TFiTFxL() => Sum_TFiTFxL(data, n) - 92;
+
+    public static int Sum_TFiTFxL(int[] data, int n)
+    {
+        int sum = 0;
+
+        try
+        {
+            SideEffect();
+        }
+        finally
+        {
+            try
+            {
+                sum += 1;
+                for (int i = 0; i < n; i++)
+                {
+                    sum += data[i];
+                }
+            }
+            finally
+            {
+                sum += 1;
+            }
+        }
+
+        return sum;
+    }
 }
 
diff --git a/src/tests/JIT/opt/InstructionCombining/Add.cs b/src/tests/JIT/opt/InstructionCombining/Add.cs
new file mode 100644
index 00000000000000..0ac13412879c94
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Add.cs
@@ -0,0 +1,215 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestAdd
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckAdd()
+        {
+            bool fail = false;
+
+            if (Add(1, 2) != 3)
+            {
+                fail = true;
+            }
+
+            if (AddLSL(5, 5) != 85)
+            {
+                fail = true;
+            }
+
+            if (AddLSLSwap(5, 5) != 85)
+            {
+                fail = true;
+            }
+
+            if (AddLSR(1, 0x20000000) != 2)
+            {
+                fail = true;
+            }
+
+            if (AddASR(-2, 0x4000) != -1)
+            {
+                fail = true;
+            }
+
+            if (AddLargeShift(0x100000, 1) != 0x900000)
+            {
+                fail = true;
+            }
+
+            if (AddLargeShift64Bit(0xAB, 0x19a0000000000) != 0x178)
+            {
+                fail = true;
+            }
+
+            if (Adds(-5, 5) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsLSL(-0x78000, 0xF) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsLSLSwap(-0x78000, 0xF) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsLSR(0, 0x3c0) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsASR(-1, 0x800) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsLargeShift(-0xFF, 0x1fe0) != 1)
+            {
+                fail = true;
+            }
+
+            if (AddsLargeShift64Bit(-0x40000000000, 1) != 1)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Add(int a, int b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            return a + b;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return a + (b<<4);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return (b<<4) + a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint AddLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #29
+            return a + (b>>29);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #14
+            return a + (b>>14);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #23
+            return a + (b<<183);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static long AddLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ASR #41
+            return a + (b>>169);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Adds(int a, int b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            if (a + b == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddsLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #15
+            if (a + (b<<15) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddsLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #15
+            if ((b<<15) + a == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddsLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #6
+            if (a + (b>>6) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddsASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #11
+            if (a + (b>>11) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AddsLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: adds {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #5
+            if (a + (b>>133) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static long AddsLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: adds {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #42
+            if (a + (b<<106) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Add.csproj b/src/tests/JIT/opt/InstructionCombining/Add.csproj
new file mode 100644
index 00000000000000..1328d31335bd09
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Add.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Add.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/And.cs b/src/tests/JIT/opt/InstructionCombining/And.cs
new file mode 100644
index 00000000000000..9a16c1938d45d8
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/And.cs
@@ -0,0 +1,215 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestAnd
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckAnd()
+        {
+            bool fail = false;
+
+            if (And(3, 2) != 2)
+            {
+                fail = true;
+            }
+
+            if (AndLSL(255, 1) != 16)
+            {
+                fail = true;
+            }
+
+            if (AndLSLSwap(255, 1) != 16)
+            {
+                fail = true;
+            }
+
+            if (AndLSR(255, 0x10000000) != 8)
+            {
+                fail = true;
+            }
+
+            if (AndASR(-5, 0x3C00) != 0xB)
+            {
+                fail = true;
+            }
+
+            if (AndLargeShift(9, 1) != 8)
+            {
+                fail = true;
+            }
+
+            if (AndLargeShift64Bit(0xF000000000, 7) != 0xE000000000)
+            {
+                fail = true;
+            }
+
+            if (Ands(4, 4) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsLSL(8, 2) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsLSLSwap(8, 2) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsLSR(3, 0xa0000) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsASR(6, 0x6000) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsLargeShift(0x80000000, 1) != 1)
+            {
+                fail = true;
+            }
+
+            if (AndsLargeShift64Bit(0xD, 0x34000000000) != 1)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int And(int a, int b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            return a & b;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return a & (b<<4);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return (b<<4) & a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint AndLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #25
+            return a & (b>>25);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #10
+            return a & (b>>10);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #3
+            return a & (b<<67);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static long AndLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #37
+            return a & (b<<101);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Ands(int a, int b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}
+            if ((a & b) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}, LSL #2
+            if ((a & (b<<2)) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}, LSL #2
+            if (((b<<2) & a) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}, LSR #17
+            if ((a & (b>>17)) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}, ASR #12
+            if ((a & (b>>12)) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsLargeShift(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: tst {{w[0-9]+}}, {{w[0-9]+}}, LSL #31
+            if ((a & (b<<255)) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AndsLargeShift64Bit(ulong a, ulong b)
+        {
+            //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #38
+            //ARM64-FULL-LINE: tst {{x[0-9]+}}, {{x[0-9]+}}
+            if ((a & (b>>230)) != 0) {
+                return 1;
+            }
+            return -1;
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/And.csproj b/src/tests/JIT/opt/InstructionCombining/And.csproj
new file mode 100644
index 00000000000000..8e7b7770b02247
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/And.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="And.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Cmn.cs b/src/tests/JIT/opt/InstructionCombining/Cmn.cs
new file mode 100644
index 00000000000000..f8e05ed9125a0d
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Cmn.cs
@@ -0,0 +1,138 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestCompareNegative
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckCompareNegative()
+        {
+            bool fail = false;
+
+            if (!Cmn(-4, 4))
+            {
+                fail = true;
+            }
+
+            if (!CmnLSL(-16, 4))
+            {
+                fail = true;
+            }
+
+            if (!CmnLSLSwap(-16, 4))
+            {
+                fail = true;
+            }
+
+            if (!CmnLSR(0xFFFFFFFC, 0x10))
+            {
+                fail = true;
+            }
+
+            if (!CmnASR(-0xA, 0x2800))
+            {
+                fail = true;
+            }
+
+            if (!CmnLargeShift(-0x18, 0x3))
+            {
+                fail = true;
+            }
+
+            if (!CmnLargeShift64Bit(-0x300000000000000, 0x6))
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool Cmn(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}}
+            if (a == -b)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}},  LSL #2
+            if (a == -(b<<2))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}},  LSL #2
+            if (-(b<<2) == a)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}},  #2
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}}
+            if (a == (uint)-(b>>2))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}},  ASR #10
+            if (a == -(b>>10))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmn {{w[0-9]+}}, {{w[0-9]+}},  LSL #3
+            if (a == -(b<<35))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmnLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: cmn {{x[0-9]+}}, {{x[0-9]+}},  LSL #55
+            if (a == -(b<<247))
+            {
+                return true;
+            }
+            return false;
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Cmn.csproj b/src/tests/JIT/opt/InstructionCombining/Cmn.csproj
new file mode 100644
index 00000000000000..f92126ec42c194
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Cmn.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Cmn.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Cmp.cs b/src/tests/JIT/opt/InstructionCombining/Cmp.cs
new file mode 100644
index 00000000000000..0ce7de0faee544
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Cmp.cs
@@ -0,0 +1,138 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestCompare
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckCompare()
+        {
+            bool fail = false;
+
+            if (!Cmp(12, 12))
+            {
+                fail = true;
+            }
+
+            if (!CmpLSL(12, 3))
+            {
+                fail = true;
+            }
+
+            if (!CmpLSLSwap(12, 3))
+            {
+                fail = true;
+            }
+
+            if (!CmpLSR(5, 0xa00))
+            {
+                fail = true;
+            }
+
+            if (!CmpASR(7, 0x380))
+            {
+                fail = true;
+            }
+
+            if (!CmpLargeShift(0x500000, 0xA))
+            {
+                fail = true;
+            }
+
+            if (!CmpLargeShift64Bit(0x580000000000000, 0xB))
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool Cmp(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}
+            if (a == b)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}, LSL #2
+            if (a == (b<<2))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}, LSL #2
+            if ((b<<2) == a)
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}, LSR #9
+            if (a == (b>>9))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}, ASR #7
+            if (a == (b>>7))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}}, LSL #19
+            if (a == (b<<115))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static bool CmpLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: lsl {{x[0-9]+}}, {{x[0-9]+}}, #55
+            //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}}
+            if (a == (b<<119))
+            {
+                return true;
+            }
+            return false;
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Cmp.csproj b/src/tests/JIT/opt/InstructionCombining/Cmp.csproj
new file mode 100644
index 00000000000000..121723d2791f87
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Cmp.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Cmp.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Eor.cs b/src/tests/JIT/opt/InstructionCombining/Eor.cs
new file mode 100644
index 00000000000000..f62c53a52268c1
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Eor.cs
@@ -0,0 +1,109 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestEor
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckEor()
+        {
+            bool fail = false;
+
+            if (Eor(5, 3) != 6)
+            {
+                fail = true;
+            }
+
+            if (EorLSL(32, 3) != 16)
+            {
+                fail = true;
+            }
+
+            if (EorLSLSwap(32, 3) != 16)
+            {
+                fail = true;
+            }
+
+            if (EorLSR(0xBA, 0xABCDE) != 0x11)
+            {
+                fail = true;
+            }
+
+            if (EorASR(0x8282, 0x1DA00000) != 0x82B9)
+            {
+                fail = true;
+            }
+
+            if (EorLargeShift(0xDCBA, 0x1F) != 0xc3ba)
+            {
+                fail = true;
+            }
+
+            if (EorLargeShift64Bit(0x2468, 0x26ae123456789ABC) != 0x373F)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Eor(int a, int b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            return a ^ b;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int EorLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return a ^ (b<<4);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int EorLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4
+            return (b<<4) ^ a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint EorLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #12
+            return a ^ (b>>12);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int EorASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #23
+            return a ^ (b>>23);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int EorLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #8
+            return a ^ (b<<136);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static ulong EorLargeShift64Bit(ulong a, ulong b)
+        {
+            //ARM64-FULL-LINE: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSR #49
+            return a ^ (b>>177);
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Eor.csproj b/src/tests/JIT/opt/InstructionCombining/Eor.csproj
new file mode 100644
index 00000000000000..dd4c42699f2e68
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Eor.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Eor.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Mvn.cs b/src/tests/JIT/opt/InstructionCombining/Mvn.cs
new file mode 100644
index 00000000000000..74957a091e63af
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Mvn.cs
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestMvn
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckMvn()
+        {
+            bool fail = false;
+
+            if (Mvn(5) != 0xFFFFFFFA)
+            {
+                fail = true;
+            }
+
+            if (MvnLSL(10) != 0xFFFAFFFF)
+            {
+                fail = true;
+            }
+
+            if (MvnLSR(0x76543210) != 0xFFFFFF89)
+            {
+                fail = true;
+            }
+
+            if (MvnASR(0xACE1234) != -0x5670A)
+            {
+                fail = true;
+            }
+
+            if (MvnLargeShift(0x1A1A) != 0x5FFFFFFf)
+            {
+                fail = true;
+            }
+
+            if (MvnLargeShift64Bit(0x2B3C2B3C2B3C2B3C) != 0xFFFFFFFFD4C3D4C3)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint Mvn(uint a)
+        {
+            //ARM64-FULL-LINE: mvn {{w[0-9]+}}, {{w[0-9]+}}
+            return ~a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint MvnLSL(uint a)
+        {
+            //ARM64-FULL-LINE: mvn {{w[0-9]+}}, {{w[0-9]+}}, LSL #15
+            return ~(a<<15);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint MvnLSR(uint a)
+        {
+            //ARM64-FULL-LINE: mvn {{w[0-9]+}}, {{w[0-9]+}}, LSR #24
+            return ~(a>>24);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int MvnASR(int a)
+        {
+            //ARM64-FULL-LINE: mvn {{w[0-9]+}}, {{w[0-9]+}}, ASR #9
+            return ~(a>>9);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint MvnLargeShift(uint a)
+        {
+            //ARM64-FULL-LINE: mvn {{w[0-9]+}}, {{w[0-9]+}}, LSL #28
+            return ~(a<<60);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static ulong MvnLargeShift64Bit(ulong a)
+        {
+            //ARM64-FULL-LINE: mvn {{x[0-9]+}}, {{x[0-9]+}}, LSR #32
+            return ~(a>>160);
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Mvn.csproj b/src/tests/JIT/opt/InstructionCombining/Mvn.csproj
new file mode 100644
index 00000000000000..4ad4ea2e208934
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Mvn.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Mvn.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Neg.cs b/src/tests/JIT/opt/InstructionCombining/Neg.cs
new file mode 100644
index 00000000000000..cf2850eab045a0
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Neg.cs
@@ -0,0 +1,98 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestNeg
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckNeg()
+        {
+            bool fail = false;
+
+            if (Neg(3) != -3)
+            {
+                fail = true;
+            }
+
+            if (NegLSL(4) != -16)
+            {
+                fail = true;
+            }
+
+            if (NegLSR(0x300000) != 0xFFFFFFFD)
+            {
+                fail = true;
+            }
+
+            if (NegASR(0xA000) != -5)
+            {
+                fail = true;
+            }
+
+            if (NegLargeShift(0xC) != -0x180000)
+            {
+                fail = true;
+            }
+            
+            if (NegLargeShift64Bit(0xD) != 0x6000000000000000)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Neg(int a)
+        {
+            //ARM64-FULL-LINE: neg {{w[0-9]+}}, {{w[0-9]+}}
+            return -a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int NegLSL(int a)
+        {
+            //ARM64-FULL-LINE: neg {{w[0-9]+}}, {{w[0-9]+}}, LSL #2
+            return -(a<<2);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint NegLSR(uint a)
+        {
+            //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}},  #20
+            //ARM64-FULL-LINE: neg {{w[0-9]+}}, {{w[0-9]+}}
+            return (uint)-(a>>20);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int NegASR(int a)
+        {
+            //ARM64-FULL-LINE: neg {{w[0-9]+}}, {{w[0-9]+}}, ASR #13
+            return -(a>>13);
+        }
+        
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int NegLargeShift(int a)
+        {
+            //ARM64-FULL-LINE: neg {{w[0-9]+}}, {{w[0-9]+}}, LSL #17
+            return -(a<<81);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static long NegLargeShift64Bit(long a)
+        {
+            //ARM64-FULL-LINE: neg {{x[0-9]+}}, {{x[0-9]+}}, LSL #61
+            return -(a<<189);
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Neg.csproj b/src/tests/JIT/opt/InstructionCombining/Neg.csproj
new file mode 100644
index 00000000000000..5593e2e3ec2db3
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Neg.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Neg.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Orr.cs b/src/tests/JIT/opt/InstructionCombining/Orr.cs
new file mode 100644
index 00000000000000..204b1c85d1eab2
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Orr.cs
@@ -0,0 +1,109 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestOrr
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckOrr()
+        {
+            bool fail = false;
+
+            if (Orr(2, 4) != 6)
+            {
+                fail = true;
+            }
+
+            if (OrrLSL(0x180, 5) != 0x1C0)
+            {
+                fail = true;
+            }
+
+            if (OrrLSLSwap(0x180, 5) != 0x1C0)
+            {
+                fail = true;
+            }
+
+            if (OrrLSR(0x7, 0x1234) != 0x27)
+            {
+                fail = true;
+            }
+
+            if (OrrASR(0x13, 0xA98765) != 0x3B)
+            {
+                fail = true;
+            }
+
+            if (OrrLargeShift(0x1A40, 0x8E) != 0x1BC0)
+            {
+                fail = true;
+            }
+
+            if (OrrLargeShift64Bit(0x2468, 0xDEF1234567) != 0x246F)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Orr(int a, int b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            return a | b;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int OrrLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #6
+            return a | (b<<6);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int OrrLSLSwap(int a, int b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #6
+            return (b<<6) | a;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint OrrLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #7
+            return a | (b>>7);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int OrrASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #18
+            return a | (b>>18);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int OrrLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5
+            return a | (b<<101);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static ulong OrrLargeShift64Bit(ulong a, ulong b)
+        {
+            //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSR #33
+            return a | (b>>289);
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Orr.csproj b/src/tests/JIT/opt/InstructionCombining/Orr.csproj
new file mode 100644
index 00000000000000..07c6535e553e0a
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Orr.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Orr.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/opt/InstructionCombining/Sub.cs b/src/tests/JIT/opt/InstructionCombining/Sub.cs
new file mode 100644
index 00000000000000..105666b2ff6194
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Sub.cs
@@ -0,0 +1,187 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+namespace TestSub
+{
+    public class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [Fact]
+        public static int CheckSub()
+        {
+            bool fail = false;
+
+            if (Sub(5, 2) != 3)
+            {
+                fail = true;
+            }
+
+            if (SubLSL(100, 2) != 84)
+            {
+                fail = true;
+            }
+
+            if (SubLSR(10, 64) != 6)
+            {
+                fail = true;
+            }
+
+            if (SubASR(10, 320) != 5)
+            {
+                fail = true;
+            }
+
+            if (SubLargeShift(0x40000F, 1) != 0xF)
+            {
+                fail = true;
+            }
+
+            if (SubLargeShift64Bit(0x40000000000ACE, 1) != 0xACE)
+            {
+                fail = true;
+            }
+
+            if (Subs(15, 15) != 1)
+            {
+                fail = true;
+            }
+
+            if (SubsLSL(14, 7) != 1)
+            {
+                fail = true;
+            }
+            
+            if (SubsLSR(1, 0x80000000) != 1)
+            {
+                fail = true;
+            }
+
+            if (SubsASR(27, 0x1B00000) != 1)
+            {
+                fail = true;
+            }
+
+            if (SubsLargeShift(4, 2) != 1)
+            {
+                fail = true;
+            }
+
+            if (SubsLargeShift64Bit(0x300000000, 3) != 1)
+            {
+                fail = true;
+            }
+
+            if (fail)
+            {
+                return 101;
+            }
+            return 100;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Sub(int a, int b)
+        {
+            //ARM64-FULL-LINE: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            return a - b;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #3
+            return a - (b<<3);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static uint SubLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #4
+            return a - (b>>4);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #6
+            return a - (b>>6);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #22
+            return a - (b<<118);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static long SubLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #54
+            return a - (b<<118);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Subs(int a, int b)
+        {
+            //ARM64-FULL-LINE: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+            if (a - b == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubsLSL(int a, int b)
+        {
+            //ARM64-FULL-LINE: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #1
+            if (a - (b<<1) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+        
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubsLSR(uint a, uint b)
+        {
+            //ARM64-FULL-LINE: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #31
+            if (a - (b>>31) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubsASR(int a, int b)
+        {
+            //ARM64-FULL-LINE: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #20
+            if (a - (b>>20) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubsLargeShift(int a, int b)
+        {
+            //ARM64-FULL-LINE: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #1
+            if (a - (b<<33) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int SubsLargeShift64Bit(long a, long b)
+        {
+            //ARM64-FULL-LINE: subs {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #32
+            if (a - (b<<96) == 0) {
+                return 1;
+            }
+            return -1;
+        }
+    }
+}
diff --git a/src/tests/JIT/opt/InstructionCombining/Sub.csproj b/src/tests/JIT/opt/InstructionCombining/Sub.csproj
new file mode 100644
index 00000000000000..fdea1deaa97d69
--- /dev/null
+++ b/src/tests/JIT/opt/InstructionCombining/Sub.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <!-- Needed for CLRTestEnvironmentVariable -->
+    <RequiresProcessIsolation>true</RequiresProcessIsolation>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="Sub.cs">
+      <HasDisasmCheck>true</HasDisasmCheck>
+    </Compile>
+    <CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JITMinOpts" Value="0" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/Loader/AssemblyDependencyResolver/MissingHostPolicyTests/MissingHostPolicyTests.csproj b/src/tests/Loader/AssemblyDependencyResolver/MissingHostPolicyTests/MissingHostPolicyTests.csproj
index 7d064de314ac85..8e2352001bc13e 100644
--- a/src/tests/Loader/AssemblyDependencyResolver/MissingHostPolicyTests/MissingHostPolicyTests.csproj
+++ b/src/tests/Loader/AssemblyDependencyResolver/MissingHostPolicyTests/MissingHostPolicyTests.csproj
@@ -3,6 +3,8 @@
     <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
+    <!-- AssemblyDependencyResolver is not supported -->
+    <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="InvalidHostingTest.cs" />
diff --git a/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.cs b/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.cs
index d15e33dc051c40..ae71a4ac5c6492 100644
--- a/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.cs
+++ b/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.cs
@@ -14,6 +14,7 @@
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.IO;
+using TestLibrary;
 using Xunit;
 
 class TestAssemblyLoadContext : AssemblyLoadContext
@@ -207,7 +208,7 @@ private static int TestFullUnload(TestCase testCase)
         return 100;
     }
 
-    [Fact]
+    [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsCollectibleAssembliesSupported))]
     public static int TestEntryPoint()
     {
         int status = 100;
diff --git a/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.csproj b/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.csproj
index d232fb7ec1c7e8..eddb28d22e5e91 100644
--- a/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.csproj
+++ b/src/tests/Loader/CollectibleAssemblies/ResolvedFromDifferentContext/ResolvedFromDifferentContext.csproj
@@ -13,6 +13,7 @@
   <ItemGroup>
     <ProjectReference Include="TestInterface.csproj" ReferenceOutputAssembly="false" />
     <ProjectReference Include="TestClass.csproj" ReferenceOutputAssembly="false" />
+    <ProjectReference Include="$(TestSourceDir)Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj" />
   </ItemGroup>
 
   <PropertyGroup>
diff --git a/src/tests/Loader/NativeLibs/FromNativePaths.csproj b/src/tests/Loader/NativeLibs/FromNativePaths.csproj
index 64d8810f29a548..74250016f080bf 100644
--- a/src/tests/Loader/NativeLibs/FromNativePaths.csproj
+++ b/src/tests/Loader/NativeLibs/FromNativePaths.csproj
@@ -4,6 +4,8 @@
     <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
     <CLRTestPriority>1</CLRTestPriority>
     <RequiresProcessIsolation>true</RequiresProcessIsolation>
+    <!-- Test expects a CORE_ROOT -->
+    <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="FromNativePaths.cs" />
diff --git a/src/tests/Loader/classloader/MethodImpl/generics_override1.ilproj b/src/tests/Loader/classloader/MethodImpl/generics_override1.ilproj
index 90f2ad5e53d574..8dc52a8a2682da 100644
--- a/src/tests/Loader/classloader/MethodImpl/generics_override1.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/generics_override1.ilproj
@@ -1,8 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for NativeAotIncompatible -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
-
     <!-- Testing TypeLoad/MissingMethod exceptions in situations that are expensive to detect -->
     <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
diff --git a/src/tests/Loader/classloader/MethodImpl/override_override1.ilproj b/src/tests/Loader/classloader/MethodImpl/override_override1.ilproj
index 35d314f33711b0..c2baddccc78d10 100644
--- a/src/tests/Loader/classloader/MethodImpl/override_override1.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/override_override1.ilproj
@@ -1,8 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
+    <!-- Testing TypeLoad exceptions in situations that are expensive to detect -->
+    <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="override_override1.il" />
diff --git a/src/tests/Loader/classloader/MethodImpl/self_override1.ilproj b/src/tests/Loader/classloader/MethodImpl/self_override1.ilproj
index 8a17c63cb6398b..76f26ba97028d3 100644
--- a/src/tests/Loader/classloader/MethodImpl/self_override1.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/self_override1.ilproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
   <PropertyGroup>
diff --git a/src/tests/Loader/classloader/MethodImpl/self_override2.ilproj b/src/tests/Loader/classloader/MethodImpl/self_override2.ilproj
index 02141708da0f6c..3cbb5dfc1190d0 100644
--- a/src/tests/Loader/classloader/MethodImpl/self_override2.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/self_override2.ilproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/MethodImpl/self_override3.ilproj b/src/tests/Loader/classloader/MethodImpl/self_override3.ilproj
index 861e30351d3ca2..e895df0df22bfb 100644
--- a/src/tests/Loader/classloader/MethodImpl/self_override3.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/self_override3.ilproj
@@ -1,8 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
+    <!-- Testing TypeLoad exceptions in situations that are expensive to detect -->
+    <NativeAotIncompatible>true</NativeAotIncompatible>
   </PropertyGroup>
   <ItemGroup>
     <Compile Include="self_override3.il" />
diff --git a/src/tests/Loader/classloader/MethodImpl/self_override5.ilproj b/src/tests/Loader/classloader/MethodImpl/self_override5.ilproj
index b1e631f7d6436f..0eac764ff169fb 100644
--- a/src/tests/Loader/classloader/MethodImpl/self_override5.ilproj
+++ b/src/tests/Loader/classloader/MethodImpl/self_override5.ilproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk.IL">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
   <ItemGroup>
diff --git a/src/tests/Loader/classloader/methodoverriding/regressions/576621/VSW576621.csproj b/src/tests/Loader/classloader/methodoverriding/regressions/576621/VSW576621.csproj
index 02624af16b85f9..706c87a3e6f35d 100644
--- a/src/tests/Loader/classloader/methodoverriding/regressions/576621/VSW576621.csproj
+++ b/src/tests/Loader/classloader/methodoverriding/regressions/576621/VSW576621.csproj
@@ -1,7 +1,5 @@
 <Project Sdk="Microsoft.NET.Sdk">
   <PropertyGroup>
-    <!-- Needed for mechanical merging of all remaining tests, this particular project may not actually need process isolation -->
-    <RequiresProcessIsolation>true</RequiresProcessIsolation>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <CLRTestPriority>1</CLRTestPriority>
   </PropertyGroup>
diff --git a/src/tests/build.proj b/src/tests/build.proj
index 74be7ea380ded2..c57cff8db050ba 100644
--- a/src/tests/build.proj
+++ b/src/tests/build.proj
@@ -688,8 +688,7 @@
       <CrossgenCmd Condition="'$(__CompositeBuildMode)' == ''">$(CrossgenCmd) --crossgen2-parallelism 1</CrossgenCmd>
 
       <CrossgenCmd>$(CrossgenCmd) --verify-type-and-field-layout</CrossgenCmd>
-      <CrossgenCmd Condition="'$(RunningOnUnix)' == 'true'">$(CrossgenCmd) --crossgen2-path "$(__BinDir)\$(BuildArchitecture)\crossgen2\crossgen2"</CrossgenCmd>
-      <CrossgenCmd Condition="'$(RunningOnUnix)' != 'true'">$(CrossgenCmd) --crossgen2-path "$(__BinDir)\$(BuildArchitecture)\crossgen2\crossgen2.exe"</CrossgenCmd>
+      <CrossgenCmd>$(CrossgenCmd) --crossgen2-path "$(__BinDir)\$(BuildArchitecture)\crossgen2\crossgen2$(ExeSuffix)"</CrossgenCmd>
     </PropertyGroup>
 
     <Message Importance="High" Text="$(MsgPrefix)Compiling framework using Crossgen2: $(CrossgenCmd)" />
diff --git a/src/tests/issues.targets b/src/tests/issues.targets
index 3e5e3c5b88d8d7..3549319d01c23c 100644
--- a/src/tests/issues.targets
+++ b/src/tests/issues.targets
@@ -653,6 +653,15 @@
         <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/StaticVirtualMethods/DiamondShape/svm_diamondshape_r/*">
             <Issue>https://github.com/dotnet/runtime/issues/72589</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/self_override2/*">
+            <Issue>https://github.com/dotnet/runtime/issues/111991</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/MethodImpl/self_override5/*">
+            <Issue>https://github.com/dotnet/runtime/issues/111991</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/methodoverriding/regressions/576621/VSW576621/*">
+            <Issue>https://github.com/dotnet/runtime/issues/111991</Issue>
+        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/Loader/classloader/StaticVirtualMethods/NegativeTestCases/**">
             <Issue>https://github.com/dotnet/runtimelab/issues/155: Compatible TypeLoadException for invalid inputs</Issue>
         </ExcludeList>