diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c593977e2c320..65ab1314d37444 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -11,7 +11,7 @@ Written by the `LLVM Team <https://llvm.org/>`_
 Introduction
 ============
 
-This document contains the release notes for the Clang C/C++/Objective-C
+This document contains the release notes for the Clang C/C++/Objective-C/OpenCL
 frontend, part of the LLVM Compiler Infrastructure, release 8.0.0. Here we
 describe the status of Clang in some detail, including major
 improvements from the previous release and new feature work. For the
@@ -41,7 +41,7 @@ Major New Features
   example, due to renaming a class or namespace).
   See the :ref:`UsersManual <profile_remapping>` for details.
 
-- Clang has new options to initialize automatic variables with either a pattern or with zeroes. The default is still that automatic variables are uninitialized. This isn't meant to change the semantics of C and C++. Rather, it's meant to be a last resort when programmers inadvertently have some undefined behavior in their code. These options aim to make undefined behavior hurt less, which security-minded people will be very happy about. Notably, this means that there's no inadvertent information leak when:
+- Clang has new options to initialize automatic variables with a pattern. The default is still that automatic variables are uninitialized. This isn't meant to change the semantics of C and C++. Rather, it's meant to be a last resort when programmers inadvertently have some undefined behavior in their code. These options aim to make undefined behavior hurt less, which security-minded people will be very happy about. Notably, this means that there's no inadvertent information leak when:
 
     * The compiler re-uses stack slots, and a value is used uninitialized.
 
@@ -65,8 +65,6 @@ Major New Features
 
     * ``-ftrivial-auto-var-init=pattern``
 
-    * ``-ftrivial-auto-var-init=zero`` ``-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang``
-
   There is also a new attribute to request a variable to not be initialized, mainly to disable initialization of large stack arrays when deemed too expensive:
 
     * ``int dont_initialize_me __attribute((uninitialized));``
@@ -212,7 +210,7 @@ Attribute Changes in Clang
 Windows Support
 ---------------
 
-- clang-cl now supports the use of the precompiled header options /Yc and /Yu
+- clang-cl now supports the use of the precompiled header options ``/Yc`` and ``/Yu``
   without the filename argument. When these options are used without the
   filename, a `#pragma hdrstop` inside the source marks the end of the
   precompiled code.
@@ -231,7 +229,8 @@ Windows Support
 
 - Allow using Address Sanitizer and Undefined Behaviour Sanitizer on MinGW.
 
-- ...
+- Structured Exception Handling support for ARM64 Windows. The ARM64 Windows
+  target is in pretty good shape now.
 
 
 C Language Changes in Clang
@@ -261,10 +260,60 @@ Objective-C Language Changes in Clang
 
 ...
 
-OpenCL C Language Changes in Clang
-----------------------------------
+OpenCL Kernel Language Changes in Clang
+---------------------------------------
+
+Misc:
+
+- Improved address space support with Clang builtins.
+
+- Improved various diagnostics for vectors with element types from extensions;
+  values used in attributes; duplicate address spaces.
+
+- Allow blocks to capture arrays.
+
+- Allow zero assignment and comparisons between variables of ``queue_t`` type.
+
+- Improved diagnostics of formatting specifiers and argument promotions for
+  vector types in ``printf``.
+
+- Fixed return type of enqueued kernel and pipe builtins.
+
+- Fixed address space of ``clk_event_t`` generated in the IR.
+
+- Fixed address space when passing/returning structs.
+
+Header file fixes:
+
+- Added missing extension guards around several builtin function overloads.
+
+- Fixed serialization support when registering vendor extensions using pragmas.
+
+- Fixed OpenCL version in declarations of builtin functions with sampler-less
+  image accesses.
+
+New vendor extensions added:
+
+- ``cl_intel_planar_yuv``
+
+- ``cl_intel_device_side_avc_motion_estimation``
+
+
+C++ for OpenCL:
+
+- Added support of address space conversions in C style casts.
+
+- Enabled address spaces for references.
+
+- Fixed use of address spaces in templates: address space deduction and diagnostics.
+
+- Changed default address space to work with C++ specific concepts: class members,
+  template parameters, etc.
+
+- Added generic address space by default to the generated hidden 'this' parameter.
+
+- Extend overload ranking rules for address spaces.
 
-...
 
 ABI Changes in Clang
 --------------------
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 3d35d37b7db329..6b02e68965df16 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -227,9 +227,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
   }
 
-  const char *Exec = Args.MakeArgString(
-      !NeedsSanitizerDeps ? ToolChain.GetLinkerPath()
-                          : ToolChain.GetProgramPath("ld.lld"));
+  const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
diff --git a/clang/lib/Frontend/InitHeaderSearch.cpp b/clang/lib/Frontend/InitHeaderSearch.cpp
index ac3bb713ddccca..67842b5dca2968 100644
--- a/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/clang/lib/Frontend/InitHeaderSearch.cpp
@@ -433,14 +433,6 @@ void InitHeaderSearch::AddDefaultCPlusPlusIncludePaths(
   case llvm::Triple::DragonFly:
     AddPath("/usr/include/c++/5.0", CXXSystem, false);
     break;
-  case llvm::Triple::OpenBSD: {
-    std::string t = triple.getTriple();
-    if (t.substr(0, 6) == "x86_64")
-      t.replace(0, 6, "amd64");
-    AddGnuCPlusPlusIncludePaths("/usr/include/g++",
-                                t, "", "", triple);
-    break;
-  }
   case llvm::Triple::Minix:
     AddGnuCPlusPlusIncludePaths("/usr/gnu/include/c++/4.4.3",
                                 "", "", "", triple);
diff --git a/llvm/cmake/modules/CheckCompilerVersion.cmake b/llvm/cmake/modules/CheckCompilerVersion.cmake
index 47576a2b5e84c3..4dce4514a6280b 100644
--- a/llvm/cmake/modules/CheckCompilerVersion.cmake
+++ b/llvm/cmake/modules/CheckCompilerVersion.cmake
@@ -49,9 +49,6 @@ check_compiler_version("MSVC" "Visual Studio" ${MSVC_MIN} ${MSVC_SOFT_ERROR})
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
   if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC")
-    if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS MSVC_MIN)
-      message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=${MSVC_MIN}, your version is ${CMAKE_CXX_SIMULATE_VERSION}.")
-    endif()
     set(CLANG_CL 1)
   elseif(NOT LLVM_ENABLE_LIBCXX)
     # Test that we aren't using too old of a version of libstdc++.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index d5b63d64369735..bd41f46214a35c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -122,10 +122,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
           (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
 def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
           (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
-
-// The legalizer inserts an unnecessary `and 1` to make input conform
-// to getBooleanContents, which we can lower away.
-def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
-          (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
-          (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5ac153244df925..fe75dbd8eff4ee 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1138,15 +1138,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
   if (AM.hasSymbolicDisplacement())
     return true;
 
+  bool IsRIPRelTLS = false;
   bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
+  if (IsRIPRel) {
+    SDValue Val = N.getOperand(0);
+    if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+      IsRIPRelTLS = true;
+  }
 
-  // We can't use an addressing mode in the 64-bit large code model. In the
-  // medium code model, we use can use an mode when RIP wrappers are present.
-  // That signifies access to globals that are known to be "near", such as the
-  // GOT itself.
+  // We can't use an addressing mode in the 64-bit large code model.
+  // Global TLS addressing is an exception. In the medium code model,
+  // we use can use a mode when RIP wrappers are present.
+  // That signifies access to globals that are known to be "near",
+  // such as the GOT itself.
   CodeModel::Model M = TM.getCodeModel();
   if (Subtarget->is64Bit() &&
-      (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
+      ((M == CodeModel::Large && !IsRIPRelTLS) ||
+       (M == CodeModel::Medium && !IsRIPRel)))
     return true;
 
   // Base and index reg must be 0 in order to use %rip as base.
diff --git a/llvm/test/CodeGen/WebAssembly/select.ll b/llvm/test/CodeGen/WebAssembly/select.ll
index daa934f448448e..ef18d9183e50dc 100644
--- a/llvm/test/CodeGen/WebAssembly/select.ll
+++ b/llvm/test/CodeGen/WebAssembly/select.ll
@@ -17,8 +17,10 @@ define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) {
 
 ; CHECK-LABEL: select_i32_bool_nozext:
 ; CHECK-NEXT: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32){{$}}
-; SLOW-NEXT: i32.select $push0=, $1, $2, $0{{$}}
-; SLOW-NEXT: return     $pop0{{$}}
+; SLOW-NEXT: i32.const  $push0=, 1{{$}}
+; SLOW-NEXT: i32.and    $push1=, $0, $pop0{{$}}
+; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
+; SLOW-NEXT: return     $pop2{{$}}
 define i32 @select_i32_bool_nozext(i1 %a, i32 %b, i32 %c) {
   %cond = select i1 %a, i32 %b, i32 %c
   ret i32 %cond
@@ -55,8 +57,10 @@ define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) {
 
 ; CHECK-LABEL: select_i64_bool_nozext:
 ; CHECK-NEXT: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64){{$}}
-; SLOW-NEXT: i64.select $push0=, $1, $2, $0{{$}}
-; SLOW-NEXT: return     $pop0{{$}}
+; SLOW-NEXT: i32.const  $push0=, 1{{$}}
+; SLOW-NEXT: i32.and    $push1=, $0, $pop0{{$}}
+; SLOW-NEXT: i64.select $push2=, $1, $2, $pop1{{$}}
+; SLOW-NEXT: return     $pop2{{$}}
 define i64 @select_i64_bool_nozext(i1 %a, i64 %b, i64 %c) {
   %cond = select i1 %a, i64 %b, i64 %c
   ret i64 %cond
@@ -157,3 +161,16 @@ define double @select_f64_ne(i32 %a, double %b, double %c) {
   %cond = select i1 %cmp, double %b, double %c
   ret double %cond
 }
+
+; CHECK-LABEL: pr40805:
+; CHECK-NEXT: .functype pr40805 (i32, i32, i32) -> (i32){{$}}
+; SLOW-NEXT: i32.const  $push0=, 1{{$}}
+; SLOW-NEXT: i32.and    $push1=, $0, $pop0{{$}}
+; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
+; SLOW-NEXT: return     $pop2{{$}}
+define i32 @pr40805(i32 %x, i32 %y, i32 %z) {
+  %a = and i32 %x, 1
+  %b = icmp ne i32 %a, 0
+  %c = select i1 %b, i32 %y, i32 %z
+  ret i32 %c
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index c871f60e6454ce..c3af6f9abe60b8 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -29,7 +29,7 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <16 x i8> @select_v16i8(i1 %c, <16 x i8> %x, <16 x i8> %y) {
+define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
@@ -99,7 +99,7 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
 ; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <8 x i16> @select_v8i16(i1 %c, <8 x i16> %x, <8 x i16> %y) {
+define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
@@ -170,7 +170,7 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <4 x i32> @select_v4i32(i1 %c, <4 x i32> %x, <4 x i32> %y) {
+define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
@@ -240,7 +240,7 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
 ; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <2 x i64> @select_v2i64(i1 %c, <2 x i64> %x, <2 x i64> %y) {
+define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
@@ -313,7 +313,7 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
 ; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <4 x float> @select_v4f32(i1 %c, <4 x float> %x, <4 x float> %y) {
+define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) {
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
@@ -383,7 +383,7 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
 ; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
 ; CHECK-NEXT: return $pop[[R]]{{$}}
-define <2 x double> @select_v2f64(i1 %c, <2 x double> %x, <2 x double> %y) {
+define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) {
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
diff --git a/llvm/test/CodeGen/X86/code-model-elf.ll b/llvm/test/CodeGen/X86/code-model-elf.ll
index 56d3f4c102f0f1..f7ffd6ea1eb7c7 100644
--- a/llvm/test/CodeGen/X86/code-model-elf.ll
+++ b/llvm/test/CodeGen/X86/code-model-elf.ll
@@ -37,6 +37,8 @@ target triple = "x86_64--linux"
 @global_data = dso_local global [10 x i32] [i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
 @static_data = internal global [10 x i32] zeroinitializer, align 16
 @extern_data = external global [10 x i32], align 16
+@thread_data = external thread_local global i32, align 4
+
 
 define dso_local i32* @lea_static_data() #0 {
 ; SMALL-STATIC-LABEL: lea_static_data:
@@ -373,6 +375,70 @@ define dso_local void ()* @lea_extern_fn() #0 {
   ret void ()* @extern_fn
 }
 
+; FIXME: The result is same for small, medium and large model, because we
+; specify pie option in the test case. And the type of tls is initial exec tls.
+; For pic code. The large model code for pic tls should be emitted as below.
+
+; .L3:
+; leaq	.L3(%rip), %rbx
+; movabsq	$_GLOBAL_OFFSET_TABLE_-.L3, %r11
+; addq	%r11, %rbx
+; leaq	thread_data@TLSGD(%rip), %rdi
+; movabsq	$__tls_get_addr@PLTOFF, %rax
+; addq	%rbx, %rax
+; call	*%rax
+; movl	(%rax), %eax
+
+; The medium and small model code for pic tls should be emitted as below.
+; data16
+; leaq	thread_data@TLSGD(%rip), %rdi
+; data16
+; data16
+; rex64
+; callq	__tls_get_addr@PLT
+; movl	(%rax), %eax
+
+define dso_local i32 @load_thread_data() #0 {
+; SMALL-STATIC-LABEL: load_thread_data:
+; SMALL-STATIC:       # %bb.0:
+; SMALL-STATIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; SMALL-STATIC-NEXT:    movl    %fs:(%rax), %eax
+; SMALL-STATIC-NEXT:    retq
+;
+; MEDIUM-STATIC-LABEL: load_thread_data:
+; MEDIUM-STATIC:       # %bb.0:
+; MEDIUM-STATIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; MEDIUM-STATIC-NEXT:    movl    %fs:(%rax), %eax
+; MEDIUM-STATIC-NEXT:    retq
+;
+; LARGE-STATIC-LABEL: load_thread_data:
+; LARGE-STATIC:       # %bb.0:
+; LARGE-STATIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; LARGE-STATIC-NEXT:    movl    %fs:(%rax), %eax
+; LARGE-STATIC-NEXT:    retq
+;
+; SMALL-PIC-LABEL: load_thread_data:
+; SMALL-PIC:       # %bb.0:
+; SMALL-PIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; SMALL-PIC-NEXT:    movl    %fs:(%rax), %eax
+; SMALL-PIC-NEXT:    retq
+;
+; MEDIUM-PIC-LABEL: load_thread_data:
+; MEDIUM-PIC:       # %bb.0:
+; MEDIUM-PIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; MEDIUM-PIC-NEXT:    movl    %fs:(%rax), %eax
+; MEDIUM-PIC-NEXT:    retq
+;
+; LARGE-PIC-LABEL: load_thread_data:
+; LARGE-PIC:       # %bb.0:
+; LARGE-PIC-NEXT:    movq    thread_data@GOTTPOFF(%rip), %rax
+; LARGE-PIC-NEXT:    movl    %fs:(%rax), %eax
+; LARGE-PIC-NEXT:    retq
+;
+  %1 = load i32, i32* @thread_data, align 4
+  ret i32 %1
+}
+
 attributes #0 = { noinline nounwind uwtable }
 
 !llvm.module.flags = !{!0, !1, !2}
diff --git a/llvm/tools/llvm-xray/xray-converter.cpp b/llvm/tools/llvm-xray/xray-converter.cpp
index 3f153b99bc9366..a682dbe53e3bd8 100644
--- a/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/llvm/tools/llvm-xray/xray-converter.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/JSON.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
@@ -242,6 +241,31 @@ StackTrieNode *findOrCreateStackNode(
   return CurrentStack;
 }
 
+void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
+                            uint32_t TId, uint32_t PId, bool Symbolize,
+                            const FuncIdConversionHelper &FuncIdHelper,
+                            double EventTimestampUs,
+                            const StackTrieNode &StackCursor,
+                            StringRef FunctionPhenotype) {
+  OS << "    ";
+  if (Version >= 3) {
+    OS << llvm::formatv(
+        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
+        R"("ts" : "{4:f4}", "sf" : "{5}" })",
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+                   : llvm::to_string(FuncId)),
+        FunctionPhenotype, TId, PId, EventTimestampUs,
+        StackCursor.ExtraData.id);
+  } else {
+    OS << llvm::formatv(
+        R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
+        R"("ts" : "{3:f3}", "sf" : "{4}" })",
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+                   : llvm::to_string(FuncId)),
+        FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
+  }
+}
+
 } // namespace
 
 void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@@ -252,14 +276,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
 
   unsigned id_counter = 0;
 
+  OS << "{\n  \"traceEvents\": [";
   DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
   DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
   DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
   std::forward_list<StackTrieNode> NodeStore{};
-
-  // Create a JSON Array which will hold all trace events.
-  json::Array TraceEvents;
+  int loop_count = 0;
   for (const auto &R : Records) {
+    if (loop_count++ == 0)
+      OS << "\n";
+    else
+      OS << ",\n";
+
     // Chrome trace event format always wants data in micros.
     // CyclesPerMicro = CycleHertz / 10^6
     // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@@ -284,15 +312,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // type of B for begin or E for end, thread id, process id,
       // timestamp in microseconds, and a stack frame id. The ids are logged
       // in an id dictionary after the events.
-      TraceEvents.push_back(json::Object({
-          {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
-                             : llvm::to_string(R.FuncId)},
-          {"ph", "B"},
-          {"tid", llvm::to_string(R.TId)},
-          {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
-          {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
-          {"sf", llvm::to_string(StackCursor->ExtraData.id)},
-      }));
+      writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
+                             FuncIdHelper, EventTimestampUs, *StackCursor, "B");
       break;
     case RecordTypes::EXIT:
     case RecordTypes::TAIL_EXIT:
@@ -303,51 +324,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
       // (And/Or in loop termination below)
       StackTrieNode *PreviousCursor = nullptr;
       do {
-        TraceEvents.push_back(json::Object({
-            {"name", Symbolize
-                         ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
-                         : llvm::to_string(StackCursor->FuncId)},
-            {"ph", "E"},
-            {"tid", llvm::to_string(R.TId)},
-            {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
-            {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
-            {"sf", llvm::to_string(StackCursor->ExtraData.id)},
-        }));
+        if (PreviousCursor != nullptr) {
+          OS << ",\n";
+        }
+        writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
+                               Symbolize, FuncIdHelper, EventTimestampUs,
+                               *StackCursor, "E");
         PreviousCursor = StackCursor;
         StackCursor = StackCursor->Parent;
       } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
       break;
     }
   }
+  OS << "\n  ],\n"; // Close the Trace Events array.
+  OS << "  "
+     << "\"displayTimeUnit\": \"ns\",\n";
 
   // The stackFrames dictionary substantially reduces size of the output file by
   // avoiding repeating the entire call stack of function names for each entry.
-  json::Object StackFrames;
-  for (const auto &Stack : StacksByStackId) {
-    const auto &StackId = Stack.first;
-    const auto &StackFunctionNode = Stack.second;
-    json::Object::iterator It;
-    std::tie(It, std::ignore) = StackFrames.insert({
-        llvm::to_string(StackId),
-        json::Object{
-            {"name",
-             Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
-                       : llvm::to_string(StackFunctionNode->FuncId)}},
-    });
-
-    if (StackFunctionNode->Parent != nullptr)
-      It->second.getAsObject()->insert(
-          {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
+  OS << R"(  "stackFrames": {)";
+  int stack_frame_count = 0;
+  for (auto map_iter : StacksByStackId) {
+    if (stack_frame_count++ == 0)
+      OS << "\n";
+    else
+      OS << ",\n";
+    OS << "    ";
+    OS << llvm::formatv(
+        R"("{0}" : { "name" : "{1}")", map_iter.first,
+        (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
+                   : llvm::to_string(map_iter.second->FuncId)));
+    if (map_iter.second->Parent != nullptr)
+      OS << llvm::formatv(R"(, "parent": "{0}")",
+                          map_iter.second->Parent->ExtraData.id);
+    OS << " }";
   }
-
-  json::Object TraceJSON{
-      {"displayTimeUnit", "ns"},
-      {"traceEvents", std::move(TraceEvents)},
-      {"stackFrames", std::move(StackFrames)},
-  };
-
-  // Pretty-print the JSON using two spaces for indentations.
-  OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
+  OS << "\n  }\n"; // Close the stack frames map.
+  OS << "}\n";     // Close the JSON entry.
 }
 
 namespace llvm {