diff --git a/tools/profiler/core/platform-linux-android.cpp b/tools/profiler/core/platform-linux-android.cpp index 11af93456c08..8e93ca4e919a 100644 --- a/tools/profiler/core/platform-linux-android.cpp +++ b/tools/profiler/core/platform-linux-android.cpp @@ -551,7 +551,11 @@ SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, } SamplerThread::~SamplerThread() { - pthread_join(mThread, nullptr); + if (pthread_equal(mThread, pthread_self())) { + pthread_detach(mThread); + } else { + pthread_join(mThread, nullptr); + } // Just in the unlikely case some callbacks were added between the end of the // thread and now. InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), diff --git a/tools/profiler/core/platform-macos.cpp b/tools/profiler/core/platform-macos.cpp index 78f000c47061..ad0b0699f372 100644 --- a/tools/profiler/core/platform-macos.cpp +++ b/tools/profiler/core/platform-macos.cpp @@ -256,7 +256,11 @@ SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration, } SamplerThread::~SamplerThread() { - pthread_join(mThread, nullptr); + if (pthread_equal(mThread, pthread_self())) { + pthread_detach(mThread); + } else { + pthread_join(mThread, nullptr); + } // Just in the unlikely case some callbacks were added between the end of the // thread and now. InvokePostSamplingCallbacks(std::move(mPostSamplingCallbackList), diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp index 51d43a17e587..bc047a631b16 100644 --- a/tools/profiler/core/platform.cpp +++ b/tools/profiler/core/platform.cpp @@ -42,7 +42,12 @@ #include "ProfilerIOInterposeObserver.h" #include "ProfilerParent.h" #include "ProfilerRustBindings.h" +#include "mozilla/Assertions.h" +#include "mozilla/Maybe.h" #include "mozilla/MozPromise.h" +#include "nsCOMPtr.h" +#include "nsDebug.h" +#include "nsXPCOM.h" #include "shared-libraries.h" #include "VTuneProfiler.h" #include "ETWTools.h" @@ -95,6 +100,7 @@ #include "nsSystemInfo.h" #include "nsThreadUtils.h" #include "nsXULAppAPI.h" +#include "nsDirectoryServiceUtils.h" #include "Tracing.h" #include "prdtoa.h" #include "prtime.h" @@ -108,6 +114,18 @@ #include #include +// The signals that we use to control the profiler conflict with the signals +// used to control the code coverage tool. Therefore, if coverage is enabled, we +// need to disable our own signal handling mechanisms. +#ifndef MOZ_CODE_COVERAGE +# ifdef XP_WIN +// TODO: Add support for windows "signal"-like behaviour. See Bug 1867328. +# else +# include +# include +# endif +#endif + #if defined(GP_OS_android) # include "JavaExceptions.h" # include "mozilla/java/GeckoJavaSamplerNatives.h" @@ -234,6 +252,10 @@ ProfileChunkedBuffer& profiler_get_core_buffer() { mozilla::Atomic gSkipSampling; +// Atomic flag to stop the profiler from within the sampling loop +mozilla::Atomic gStopAndDumpFromSignal( + false); + #if defined(GP_OS_android) class GeckoJavaSampler : public java::GeckoJavaSampler::Natives { @@ -647,6 +669,7 @@ class CorePS { PS_GET_AND_SET(const nsACString&, ProcessName) PS_GET_AND_SET(const nsACString&, ETLDplus1) + PS_GET_AND_SET(const Maybe>&, DownloadDirectory) static void SetBandwidthCounter(ProfilerBandwidthCounter* aBandwidthCounter) { MOZ_ASSERT(sInstance); @@ -695,6 +718,9 @@ class CorePS { // lock, so it is safe to have only one instance allocated for all of the // threads. JsFrameBuffer mJsFrames; + + // Cached download directory for when we need to dump profiles to disk. + Maybe> mDownloadDirectory; }; CorePS* CorePS::sInstance = nullptr; @@ -2932,16 +2958,6 @@ static void StreamMetaJSCustomObject( ActivePS::WriteActiveConfiguration(aLock, aWriter, MakeStringSpan("configuration")); - if (!NS_IsMainThread()) { - // Leave the rest of the properties out if we're not on the main thread. - // At the moment, the only case in which this function is called on a - // background thread is if we're in a content process and are going to - // send this profile to the parent process. In that case, the parent - // process profile's "meta" object already has the rest of the properties, - // and the parent process profile is dumped on that process's main thread. - return; - } - aWriter.DoubleProperty("interval", ActivePS::Interval(aLock)); aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock)); @@ -3018,6 +3034,16 @@ static void StreamMetaJSCustomObject( } aWriter.EndObject(); + if (!NS_IsMainThread()) { + // Leave the rest of the properties out if we're not on the main thread. + // At the moment, the only case in which this function is called on a + // background thread is if we're in a content process and are going to + // send this profile to the parent process. In that case, the parent + // process profile's "meta" object already has the rest of the properties, + // and the parent process profile is dumped on that process's main thread. + return; + } + // We should avoid collecting extension metadata for profiler when there is no // observer service, since a ExtensionPolicyService could not be created then. if (nsCOMPtr os = services::GetObserverService()) { @@ -4076,6 +4102,10 @@ static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, return new SamplerThread(aLock, aGeneration, aInterval, aFeatures); } +// Forward declare the function to call when we need to dump + stop from within +// the sampler thread +void profiler_dump_and_stop(); + // This function is the sampler thread. This implementation is used for all // targets. void SamplerThread::Run() { @@ -4731,6 +4761,27 @@ void SamplerThread::Run() { scheduledSampleStart = beforeSleep + sampleInterval; SleepMicro(static_cast(sampleInterval.ToMicroseconds())); } + + // Check to see if the hard-reset flag has been set to stop the profiler. + // This should only be used on the worst occasions when we need to stop the + // profiler from within the sampling thread (e.g. if the main thread is + // stuck) We need to do this here as it is outside of the scope of the lock. + // Otherwise we'll encounter a race condition where `profiler_stop` tries to + // get the lock that we already hold. We also need to wait until after we + // have carried out post sampling callbacks, as otherwise we may reach a + // situation where another part of the program is waiting for us to finish + // sampling, but we have ended early! + if (gStopAndDumpFromSignal) { + // Reset the flag in case we restart the profiler at a later point + gStopAndDumpFromSignal = false; + // dump the profile, and stop the profiler + profiler_dump_and_stop(); + // profiler_stop will try to destroy the active sampling thread. This will + // also destroy some data structures that are used further down this + // function, leading to invalid accesses. We therefore exit the function + // directly, rather than breaking from the loop. + return; + } } // End of `while` loop. We can only be here from a `break` inside the loop. @@ -5239,6 +5290,102 @@ static const char* get_size_suffix(const char* str) { return ptr; } +#if !defined(XP_WIN) && !defined(MOZ_CODE_COVERAGE) +static void profiler_stop_signal_handler(int signal, siginfo_t* info, + void* context) { + // We cannot really do any logging here, as this is a signal handler. + // Signal handlers are limited in what functions they can call, for more + // details see: https://man7.org/linux/man-pages/man7/signal-safety.7.html + // Writing to a file is allowed, but as signal handlers are also limited in + // how long they can run, we instead set an atomic variable to true to trigger + // the sampling thread to stop and dump the data in the profiler. + gStopAndDumpFromSignal = true; +} +#endif + +// This may fail if we have previously had an issue finding the download +// directory, or if the directory has moved since we cached the path. +// This is non-ideal, but captured by Bug 1885000 +Maybe profiler_find_dump_path() { + Maybe> directory = Nothing(); + nsAutoCString path; + + { + // Acquire the lock so that we can get things from CorePS + PSAutoLock lock; + Maybe> downloadDir = Nothing(); + downloadDir = CorePS::DownloadDirectory(lock); + + // This needs to be done within the context of the lock, as otherwise + // another thread might modify CorePS::mDownloadDirectory while we're + // cloning the pointer. + if (downloadDir) { + nsCOMPtr d; + downloadDir.value()->Clone(getter_AddRefs(d)); + directory = Some(d); + } else { + return Nothing(); + } + } + + // Now, we can check to see if we have a directory, and use it to construct + // the output file + if (directory) { + // Set up the name of our profile file + path.AppendPrintf("profile_%i_%i.json", XRE_GetProcessType(), getpid()); + + // Append it to the directory we found + nsresult rv = directory.value()->AppendNative(path); + if (NS_FAILED(rv)) { + LOG("Failed to append path to profile file"); + return Nothing(); + } + +// Write the result *back* to the original path +#if defined(XP_WIN) + rv = directory.value()->GetNativeTarget(path); +#else + rv = directory.value()->GetNativePath(path); +#endif + if (NS_FAILED(rv)) { + LOG("Failed to get native path for temp path"); + return Nothing(); + } + + return Some(path); + } + + return Nothing(); +} + +void profiler_dump_and_stop() { + // pause the profiler until we are done dumping + profiler_pause(); + + // Try to save the profile to a file + if (auto path = profiler_find_dump_path()) { + profiler_save_profile_to_file(path.value().get()); + } else { + LOG("Failed to dump profile to disk"); + } + + // Stop the profiler + profiler_stop(); +} + +void profiler_init_signal_handlers() { +#if !defined(XP_WIN) && !defined(MOZ_CODE_COVERAGE) + // Set a handler to stop the profiler + struct sigaction prof_stop_sa {}; + memset(&prof_stop_sa, 0, sizeof(struct sigaction)); + prof_stop_sa.sa_sigaction = profiler_stop_signal_handler; + prof_stop_sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&prof_stop_sa.sa_mask); + DebugOnly rstop = sigaction(SIGUSR2, &prof_stop_sa, nullptr); + MOZ_ASSERT(rstop == 0, "Failed to install Profiler SIGUSR2 handler"); +#endif +} + void profiler_init(void* aStackTop) { LOG("profiler_init"); @@ -5288,10 +5435,12 @@ void profiler_init(void* aStackTop) { locked_register_thread(lock, offThreadRef); } } - // Platform-specific initialization. PlatformInit(lock); + // Initialise the signal handlers needed to start/stop the profiler + profiler_init_signal_handlers(); + #if defined(GP_OS_android) if (jni::IsAvailable()) { GeckoJavaSampler::Init(); @@ -6308,6 +6457,32 @@ bool profiler_is_paused() { return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback)); } +// See `ProfilerControl.h` for more details. +void profiler_lookup_download_directory() { + LOG("profiler_lookup_download_directory"); + + MOZ_ASSERT( + NS_IsMainThread(), + "We can only get access to the directory service from the main thread"); + + // Make sure the profiler is actually running~ + MOZ_RELEASE_ASSERT(CorePS::Exists()); + + // take the lock so that we can write to CorePS + PSAutoLock lock; + + nsCOMPtr tDownloadDir; + nsresult rv = NS_GetSpecialDirectory(NS_OS_DEFAULT_DOWNLOAD_DIR, + getter_AddRefs(tDownloadDir)); + if (NS_FAILED(rv)) { + LOG("Failed to find download directory. Profiler signal handling will not " + "be able to save to disk. Error: %s", + GetStaticErrorName(rv)); + } else { + CorePS::SetDownloadDirectory(lock, Some(tDownloadDir)); + } +} + RefPtr profiler_pause() { LOG("profiler_pause"); diff --git a/tools/profiler/public/ProfilerControl.h b/tools/profiler/public/ProfilerControl.h index 466d15eb69ba..3ee44707c755 100644 --- a/tools/profiler/public/ProfilerControl.h +++ b/tools/profiler/public/ProfilerControl.h @@ -123,6 +123,18 @@ void profiler_ensure_started( const char** aFilters, uint32_t aFilterCount, uint64_t aActiveTabID, const mozilla::Maybe& aDuration = mozilla::Nothing()); +// Tell the profiler to look up the download directory for writing profiles. +// With some features, such as signal control, we need to know the location of +// a directory where we can save profiles to disk. Because we start the +// profiler before we start the directory service, we can't access the +// download directory at profiler startup. Similarly, when we need to get the +// directory, we often can't, as we're running in non-main-thread contexts +// that don't have access to the directory service. This function gives us a +// third option, by giving us a hook to look for the download directory when +// the time is right. This might be triggered internally (e.g. when we start +// profiling), or externally, e.g. after the directory service is initialised. +void profiler_lookup_download_directory(); + //--------------------------------------------------------------------------- // Control the profiler //--------------------------------------------------------------------------- diff --git a/tools/profiler/tests/xpcshell/test_feature_posix_signals.js b/tools/profiler/tests/xpcshell/test_feature_posix_signals.js new file mode 100644 index 000000000000..28fbf890e818 --- /dev/null +++ b/tools/profiler/tests/xpcshell/test_feature_posix_signals.js @@ -0,0 +1,194 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +ChromeUtils.defineESModuleGetters(this, { + Downloads: "resource://gre/modules/Downloads.sys.mjs", + FileUtils: "resource://gre/modules/FileUtils.sys.mjs", + BrowserTestUtils: "resource://testing-common/BrowserTestUtils.sys.mjs", +}); + +const { ctypes } = ChromeUtils.importESModule( + "resource://gre/modules/ctypes.sys.mjs" +); + +// Derived from functionality in js/src/devtools/rootAnalysis/utility.js +function openLibrary(names) { + for (const name of names) { + try { + return ctypes.open(name); + } catch (e) {} + } + return undefined; +} + +// Derived heavily from equivalent sandbox testing code. +// For more details see: +// https://searchfox.org/mozilla-central/rev/1aaacaeb4fa3aca6837ecc157e43e947229ba8ce/security/sandbox/test/browser_content_sandbox_utils.js#89 +function raiseSignal(pid, sig) { + try { + const libc = openLibrary([ + "libc.so.6", + "libc.so", + "libc.dylib", + "libSystem.B.dylib", + ]); + if (!libc) { + info("Failed to open any libc shared object"); + return { ok: false }; + } + + // c.f. https://man7.org/linux/man-pages/man2/kill.2.html + // This choice of typing for `pid` is complex, and brittle, as it's + // platform dependent. Getting it wrong can result in incoreect + // generation/calling of the `kill` function. Unfortunately, as it's + // defined as `pid_t` in a header, we can't easily get access to it. + // For now, we just use an integer, and hope that the system int size + // aligns with the `pid_t` size. + const kill = libc.declare( + "kill", + ctypes.default_abi, + ctypes.int, // return value + ctypes.int32_t, // pid + ctypes.int // sig + ); + + let kres = kill(pid, sig); + if (kres != 0) { + info(`Kill returned a non-zero result ${kres}.`); + return { ok: false }; + } + + libc.close(); + } catch (e) { + info(`Exception ${e} thrown while trying to call kill`); + return { ok: false }; + } + + return { ok: true }; +} + +// We would like to use the following to wait for a stop signal to actually be +// handled: +// await Services.profiler.waitOnePeriodicSampling(); +// However, as we are trying to shut down the profiler using the sampler +// thread, this can cause complications between the callback waiting for the +// sampling to be over, and the sampler thread actually finishing. +// Instead, we use the BrowserTestUtils.waitForCondition to wait until the +// profiler is no longer active. +async function waitUntilProfilerStopped(interval = 1000, maxTries = 100) { + await BrowserTestUtils.waitForCondition( + () => !Services.profiler.IsActive(), + "the profiler should be inactive", + interval, + maxTries + ); +} + +async function cleanupAfterTest() { + // We need to cleanup written profiles after a test + // Get the system downloads directory, and use it to build a profile file + let profile = FileUtils.File(await Downloads.getSystemDownloadsDirectory()); + + // Get the process ID + let pid = Services.appinfo.processID; + + // write it to the profile file name + profile.append(`profile_0_${pid}.json`); + + // remove the file! + await IOUtils.remove(profile.path, { ignoreAbsent: true }); + + // Make sure the profiler is fully stopped, even if the test failed + await Services.profiler.StopProfiler(); +} + +// Hardcode the constants SIGUSR1 and SIGUSR2. +// This is an absolutely terrible idea, as they are implementation defined! +// However, it turns out that for 99% of the platforms we care about, and for +// 99.999% of the platforms we test, these constants are, well, constant. +// Additionally, these constants are only for _testing_ the signal handling +// feature - the actual feature relies on platform specific definitions. This +// may cause a mismatch if we test on on, say, a gnu hurd kernel, or on a +// linux kernel running on sparc, but the feature will not break - only +// the testing. +// const SIGUSR1 = Services.appinfo.OS === "Darwin" ? 30 : 10; +const SIGUSR2 = Services.appinfo.OS === "Darwin" ? 31 : 12; + +add_task(async () => { + info("Test that stopping the profiler with a posix signal works."); + registerCleanupFunction(cleanupAfterTest); + + Assert.ok( + !Services.profiler.IsActive(), + "The profiler should not begin the test active." + ); + + const entries = 100; + const interval = 1; + const threads = []; + const features = []; + + // Start the profiler, and ensure that it's active + await Services.profiler.StartProfiler(entries, interval, threads, features); + Assert.ok(Services.profiler.IsActive(), "The profiler should now be active."); + + // Get the process ID + let pid = Services.appinfo.processID; + + // Try and stop the profiler using a signal. + let result = raiseSignal(pid, SIGUSR2); + Assert.ok(result, "Raising a SIGUSR2 signal should succeed."); + + await waitUntilProfilerStopped(); + + do_test_finished(); +}); + +add_task(async () => { + info( + "Test that stopping the profiler with a posix signal writes a profile file to the system download directory." + ); + registerCleanupFunction(cleanupAfterTest); + + Assert.ok( + !Services.profiler.IsActive(), + "The profiler should not begin the test active." + ); + + const entries = 100; + const interval = 1; + const threads = []; + const features = []; + + // Get the system downloads directory, and use it to build a profile file + let profile = FileUtils.File(await Downloads.getSystemDownloadsDirectory()); + + // Get the process ID + let pid = Services.appinfo.processID; + + // use the pid to construct the name of the profile, and resulting file + profile.append(`profile_0_${pid}.json`); + + // Start the profiler, and ensure that it's active + await Services.profiler.StartProfiler(entries, interval, threads, features); + Assert.ok(Services.profiler.IsActive(), "The profiler should now be active."); + + // Try and stop the profiler using a signal. + let result = raiseSignal(pid, SIGUSR2); + Assert.ok(result, "Raising a SIGUSR2 signal should succeed."); + + // Wait for the file to exist + await BrowserTestUtils.waitForCondition( + async () => await IOUtils.exists(profile.path), + "Waiting for a profile file to be written to disk." + ); + + await waitUntilProfilerStopped(); + Assert.ok( + !Services.profiler.IsActive(), + "The profiler should now be inactive." + ); + + do_test_finished(); +}); diff --git a/tools/profiler/tests/xpcshell/xpcshell.toml b/tools/profiler/tests/xpcshell/xpcshell.toml index 5c094899a4f5..2cde39d09ff3 100644 --- a/tools/profiler/tests/xpcshell/xpcshell.toml +++ b/tools/profiler/tests/xpcshell/xpcshell.toml @@ -24,9 +24,6 @@ skip-if = ["!debug"] ["test_feature_fileioall.js"] skip-if = ["release_or_beta"] -# The sanitizer checks appears to overwrite our own memory hooks in xpcshell tests, -# and no allocation markers are gathered. Skip this test in that configuration. - ["test_feature_java.js"] skip-if = ["os != 'android'"] @@ -50,6 +47,12 @@ skip-if = [ "socketprocess_networking", ] +["test_feature_posix_signals.js"] +skip-if = [ + "ccov", + "os == 'win'", +] + # Native stackwalking is somewhat unreliable depending on the platform. # # We don't have frame pointers on macOS release and beta, so stack walking does not @@ -61,6 +64,9 @@ skip-if = [ # For sanitizer builds, there were many intermittents, and we're not getting much # additional coverage there, so it's better to be a bit more reliable. +# The sanitizer checks appears to overwrite our own memory hooks in xpcshell tests, +# and no allocation markers are gathered. Skip this test in that configuration. + ["test_feature_stackwalking.js"] skip-if = [ "os == 'mac' && release_or_beta", diff --git a/xpcom/build/XPCOMInit.cpp b/xpcom/build/XPCOMInit.cpp index fe715eff6382..6275ca9c53d9 100644 --- a/xpcom/build/XPCOMInit.cpp +++ b/xpcom/build/XPCOMInit.cpp @@ -325,6 +325,8 @@ NS_InitXPCOM(nsIServiceManager** aResult, nsIFile* aBinDirectory, if (NS_WARN_IF(NS_FAILED(rv))) { return rv; } + + // Initialise the profiler AUTO_PROFILER_INIT2; // Set up the timer globals/timer thread @@ -464,6 +466,11 @@ NS_InitXPCOM(nsIServiceManager** aResult, nsIFile* aBinDirectory, // to the directory service. nsDirectoryService::gService->RegisterCategoryProviders(); + // Now that both the profiler and directory services have been started + // we can find the download directory, where the profiler can write + // profiles if necessary + profiler_lookup_download_directory(); + // Init mozilla::SharedThreadPool (which needs the service manager). mozilla::SharedThreadPool::InitStatics();