From db668ba039041fb9b1628b4caa8f1aa9b2cde08b Mon Sep 17 00:00:00 2001 From: Jonathan Pryor Date: Wed, 21 Jun 2017 22:17:24 -0400 Subject: [PATCH] [tests] Check for emulator issues and error early The Android emulator is a "wonderful" beast. Case in point: [PR Build #1074][0], which *hung* during emulator startup, for *5 hours*: 09:13:49 Hax is enabled 09:13:49 Hax ram_size 0x0 09:13:49 HAX is working and emulator runs in fast virt mode. ... 09:13:56 adb I 06-21 09:13:56 47148 21294922 adb_io.cpp:75] readx: fd=3 wanted=4 ... # 5.5 hours later... 14:47:17 Tool /Users/builder/android-toolchain/sdk/platform-tools/adb execution finished. It "finished" because I manually killed the Job, as it had take over 6 *hours*, cumulative, to do nothing. (*Arguably* we shouldn't have a 10 hour timeout on Jenkins jobs, but Mono bumps can take upwards of 6 hours to build, so a 6 hour time isn't necessarily unusual...) This also isn't our first attempt at improving emulator reliability. See also commits c0892674, bc6440bc, 3fa9e9e9, b54f8cd2, 3b893cd4, 7450efcc, and 6358a643. (Is that enough? Likely not. Is that all of them? Probably not.) Take yet another stab at improving things: in this case, improving error checking, so that *when* an error occurs we can *fail early*, as opposed to waiting for hours on end for a Jenkins timeout to occur *or* for me to get annoyed at general Jenkins build slowness enough to track down the hung job and manually kill it... Update the `emulator` process launch so that we have event handlers for stdout and stderr messages, so that we can look for currently known error conditions, which fall into two categories: 1. HAXM needs reinstallation, or 2. Another VM is in use. HAXM appears to need reinstallation when no memory is allocated: Hax ram_size 0x0 I don't know *why* HAXM would require *re*-installation in these circumstances, but that generally appears to fix the problem. When another VM is in use, e.g. when I'm running a Veertu VM on macOS, `emulator` also fails to launch, writing the following to stderr: Failed to sync vcpu reg Failed to sync HAX vcpu contextInternal error: Initial hax sync failed If either of these conditions occur, the `` task should *fail*, with a (hopefully) relevant error message. With these changes in place, remove the `` task invocations from the `AcquireAndroidTarget` target, as the new 20 *second* timeout in `` removes the need for the first Sleep, and the 2nd sleep doesn't appear to do any good. Followup to c0892674! > in case the added sleep doesn't help, we might at least get some > more useful information We do get more information. It's not entirely *useful*: `adb` is waiting for `adbd` to provide it information, and that information never arrives: adb I 06-21 09:13:56 47148 21294922 adb_io.cpp:75] readx: fd=3 wanted=4 ...and there it waits, seemingly forever, until Job timeout or Job death occurs. [0]: https://jenkins.mono-project.com/view/Xamarin.Android/job/xamarin-android-pr-builder/1074/ --- build-tools/scripts/UnitTestApks.targets | 8 --- .../StartAndroidEmulator.cs | 66 +++++++++++++++++++ 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/build-tools/scripts/UnitTestApks.targets b/build-tools/scripts/UnitTestApks.targets index d7672235995..fdb511b4acd 100644 --- a/build-tools/scripts/UnitTestApks.targets +++ b/build-tools/scripts/UnitTestApks.targets @@ -42,10 +42,6 @@ - - { + Log.LogMessage (MessageImportance.Low, $"[emulator stdout] {e.Data}"); + if (string.IsNullOrWhiteSpace (e.Data)) + return; + if (e.Data.StartsWith ("Hax ram_size", StringComparison.Ordinal) && + e.Data.EndsWith (" 0x0", StringComparison.Ordinal)) { + Log.LogError ("Emulator failed to start: ram_size is 0MB! Please re-install HAXM."); + sawError.Set (); + } + }; + DataReceivedEventHandler error = null; + error = (o, e) => { + Log.LogMessage (MessageImportance.Low, $"[emulator stderr] {e.Data}"); + if (string.IsNullOrWhiteSpace (e.Data)) + return; + if (e.Data.StartsWith ("Failed to sync", StringComparison.Ordinal) || + e.Data.Contains ("Internal error")) { + Log.LogError ($"Emulator failed to start: {e.Data}"); + Log.LogError ($"Do you have another VM running on the machine? If so, please try exiting the VM and try again."); + sawError.Set (); + } + }; + + p.OutputDataReceived += output; + p.ErrorDataReceived += error; + p.Start (); + p.BeginOutputReadLine (); + p.BeginErrorReadLine (); + + const int Timeout = 20*1000; + int i = WaitHandle.WaitAny (new[]{sawError}, millisecondsTimeout: Timeout); + if (i == 0 || Log.HasLoggedErrors) { + p.Kill (); + return; + } + + p.CancelOutputRead (); + p.CancelErrorRead (); + + p.OutputDataReceived -= output; + p.ErrorDataReceived -= error; + + p.OutputDataReceived += WriteProcessOutputMessage; + p.ErrorDataReceived += WriteProcessErrorMessage; + + p.BeginOutputReadLine (); + p.BeginErrorReadLine (); + EmulatorProcessId = p.Id; } + + static void WriteProcessOutputMessage (object sender, DataReceivedEventArgs e) + { + Console.WriteLine (e.Data); + } + + static void WriteProcessErrorMessage (object sender, DataReceivedEventArgs e) + { + Console.Error.WriteLine (e.Data); + } } }