From b62f9595286d322e0a78daa73f58921c23a51d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=B0=8F=E7=99=BD?= <364772080@qq.com> Date: Wed, 13 Aug 2025 02:28:47 +0800 Subject: [PATCH] windows: Fix message loop using too much CPU (#35969) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #34374 This is a leftover issue from #34374. Back in #34374, I wanted to use DirectX to handle vsync, after all, that’s how 99% of Windows apps do it. But after discussing with @maxbrunsfeld , we decided to stick with the original vsync approach given gpui’s architecture. In my tests, there’s no noticeable performance difference between this PR’s approach and DirectX vsync. That said, this PR’s method does have a theoretical advantage, it doesn’t block the main thread while waiting for vsync. The only difference is that in this PR, on Windows 11 we use a newer API instead of `DwmFlush`, since Chrome’s tests have shown that `DwmFlush` has some problems. This PR also removes the use of `MsgWaitForMultipleObjects`. Release Notes: - N/A --------- Co-authored-by: Max Brunsfeld --- Cargo.toml | 1 + crates/gpui/src/platform/windows.rs | 2 + .../src/platform/windows/directx_renderer.rs | 27 +-- crates/gpui/src/platform/windows/platform.rs | 67 +++---- crates/gpui/src/platform/windows/util.rs | 24 ++- crates/gpui/src/platform/windows/vsync.rs | 174 ++++++++++++++++++ crates/gpui/src/platform/windows/wrapper.rs | 30 ++- 7 files changed, 269 insertions(+), 56 deletions(-) create mode 100644 crates/gpui/src/platform/windows/vsync.rs diff --git a/Cargo.toml b/Cargo.toml index 48a11c27da..dd14078dd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -714,6 +714,7 @@ features = [ "Win32_System_LibraryLoader", "Win32_System_Memory", "Win32_System_Ole", + "Win32_System_Performance", "Win32_System_Pipes", "Win32_System_SystemInformation", "Win32_System_SystemServices", diff --git a/crates/gpui/src/platform/windows.rs b/crates/gpui/src/platform/windows.rs index 5268d3ccba..77e0ca41bf 100644 --- a/crates/gpui/src/platform/windows.rs +++ b/crates/gpui/src/platform/windows.rs @@ -10,6 +10,7 @@ mod keyboard; mod platform; mod system_settings; mod util; +mod vsync; mod window; mod wrapper; @@ -25,6 +26,7 @@ pub(crate) use keyboard::*; pub(crate) use platform::*; pub(crate) use system_settings::*; pub(crate) use util::*; +pub(crate) use vsync::*; pub(crate) use window::*; pub(crate) use wrapper::*; diff --git a/crates/gpui/src/platform/windows/directx_renderer.rs b/crates/gpui/src/platform/windows/directx_renderer.rs index 585b1dab1c..4e72ded534 100644 --- a/crates/gpui/src/platform/windows/directx_renderer.rs +++ b/crates/gpui/src/platform/windows/directx_renderer.rs @@ -4,16 +4,15 @@ use ::util::ResultExt; use anyhow::{Context, Result}; use windows::{ Win32::{ - Foundation::{FreeLibrary, HMODULE, HWND}, + Foundation::{HMODULE, HWND}, Graphics::{ Direct3D::*, Direct3D11::*, DirectComposition::*, Dxgi::{Common::*, *}, }, - System::LibraryLoader::LoadLibraryA, }, - core::{Interface, PCSTR}, + core::Interface, }; use crate::{ @@ -208,7 +207,7 @@ impl DirectXRenderer { fn present(&mut self) -> Result<()> { unsafe { - let result = self.resources.swap_chain.Present(1, DXGI_PRESENT(0)); + let result = self.resources.swap_chain.Present(0, DXGI_PRESENT(0)); // Presenting the swap chain can fail if the DirectX device was removed or reset. if result == DXGI_ERROR_DEVICE_REMOVED || result == DXGI_ERROR_DEVICE_RESET { let reason = self.devices.device.GetDeviceRemovedReason(); @@ -1619,22 +1618,6 @@ pub(crate) mod shader_resources { } } -fn with_dll_library(dll_name: PCSTR, f: F) -> Result -where - F: FnOnce(HMODULE) -> Result, -{ - let library = unsafe { - LoadLibraryA(dll_name).with_context(|| format!("Loading dll: {}", dll_name.display()))? - }; - let result = f(library); - unsafe { - FreeLibrary(library) - .with_context(|| format!("Freeing dll: {}", dll_name.display())) - .log_err(); - } - result -} - mod nvidia { use std::{ ffi::CStr, @@ -1644,7 +1627,7 @@ mod nvidia { use anyhow::Result; use windows::{Win32::System::LibraryLoader::GetProcAddress, core::s}; - use crate::platform::windows::directx_renderer::with_dll_library; + use crate::with_dll_library; // https://github.com/NVIDIA/nvapi/blob/7cb76fce2f52de818b3da497af646af1ec16ce27/nvapi_lite_common.h#L180 const NVAPI_SHORT_STRING_MAX: usize = 64; @@ -1711,7 +1694,7 @@ mod amd { use anyhow::Result; use windows::{Win32::System::LibraryLoader::GetProcAddress, core::s}; - use crate::platform::windows::directx_renderer::with_dll_library; + use crate::with_dll_library; // https://github.com/GPUOpen-LibrariesAndSDKs/AGS_SDK/blob/5d8812d703d0335741b6f7ffc37838eeb8b967f7/ags_lib/inc/amd_ags.h#L145 const AGS_CURRENT_VERSION: i32 = (6 << 22) | (3 << 12); diff --git a/crates/gpui/src/platform/windows/platform.rs b/crates/gpui/src/platform/windows/platform.rs index 01b043a755..9e5d359e43 100644 --- a/crates/gpui/src/platform/windows/platform.rs +++ b/crates/gpui/src/platform/windows/platform.rs @@ -32,7 +32,7 @@ use crate::*; pub(crate) struct WindowsPlatform { state: RefCell, - raw_window_handles: RwLock>, + raw_window_handles: Arc>>, // The below members will never change throughout the entire lifecycle of the app. icon: HICON, main_receiver: flume::Receiver, @@ -114,7 +114,7 @@ impl WindowsPlatform { }; let icon = load_icon().unwrap_or_default(); let state = RefCell::new(WindowsPlatformState::new()); - let raw_window_handles = RwLock::new(SmallVec::new()); + let raw_window_handles = Arc::new(RwLock::new(SmallVec::new())); let windows_version = WindowsVersion::new().context("Error retrieve windows version")?; Ok(Self { @@ -134,22 +134,12 @@ impl WindowsPlatform { }) } - fn redraw_all(&self) { - for handle in self.raw_window_handles.read().iter() { - unsafe { - RedrawWindow(Some(*handle), None, None, RDW_INVALIDATE | RDW_UPDATENOW) - .ok() - .log_err(); - } - } - } - pub fn window_from_hwnd(&self, hwnd: HWND) -> Option> { self.raw_window_handles .read() .iter() - .find(|entry| *entry == &hwnd) - .and_then(|hwnd| window_from_hwnd(*hwnd)) + .find(|entry| entry.as_raw() == hwnd) + .and_then(|hwnd| window_from_hwnd(hwnd.as_raw())) } #[inline] @@ -158,7 +148,7 @@ impl WindowsPlatform { .read() .iter() .for_each(|handle| unsafe { - PostMessageW(Some(*handle), message, wparam, lparam).log_err(); + PostMessageW(Some(handle.as_raw()), message, wparam, lparam).log_err(); }); } @@ -166,7 +156,7 @@ impl WindowsPlatform { let mut lock = self.raw_window_handles.write(); let index = lock .iter() - .position(|handle| *handle == target_window) + .position(|handle| handle.as_raw() == target_window) .unwrap(); lock.remove(index); @@ -226,19 +216,19 @@ impl WindowsPlatform { } } - // Returns true if the app should quit. - fn handle_events(&self) -> bool { + // Returns if the app should quit. + fn handle_events(&self) { let mut msg = MSG::default(); unsafe { - while PeekMessageW(&mut msg, None, 0, 0, PM_REMOVE).as_bool() { + while GetMessageW(&mut msg, None, 0, 0).as_bool() { match msg.message { - WM_QUIT => return true, + WM_QUIT => return, WM_INPUTLANGCHANGE | WM_GPUI_CLOSE_ONE_WINDOW | WM_GPUI_TASK_DISPATCHED_ON_MAIN_THREAD | WM_GPUI_DOCK_MENU_ACTION => { if self.handle_gpui_evnets(msg.message, msg.wParam, msg.lParam, &msg) { - return true; + return; } } _ => { @@ -247,7 +237,6 @@ impl WindowsPlatform { } } } - false } // Returns true if the app should quit. @@ -315,8 +304,28 @@ impl WindowsPlatform { self.raw_window_handles .read() .iter() - .find(|&&hwnd| hwnd == active_window_hwnd) - .copied() + .find(|hwnd| hwnd.as_raw() == active_window_hwnd) + .map(|hwnd| hwnd.as_raw()) + } + + fn begin_vsync_thread(&self) { + let all_windows = Arc::downgrade(&self.raw_window_handles); + std::thread::spawn(move || { + let vsync_provider = VSyncProvider::new(); + loop { + vsync_provider.wait_for_vsync(); + let Some(all_windows) = all_windows.upgrade() else { + break; + }; + for hwnd in all_windows.read().iter() { + unsafe { + RedrawWindow(Some(hwnd.as_raw()), None, None, RDW_INVALIDATE) + .ok() + .log_err(); + } + } + } + }); } } @@ -347,12 +356,8 @@ impl Platform for WindowsPlatform { fn run(&self, on_finish_launching: Box) { on_finish_launching(); - loop { - if self.handle_events() { - break; - } - self.redraw_all(); - } + self.begin_vsync_thread(); + self.handle_events(); if let Some(ref mut callback) = self.state.borrow_mut().callbacks.quit { callback(); @@ -445,7 +450,7 @@ impl Platform for WindowsPlatform { ) -> Result> { let window = WindowsWindow::new(handle, options, self.generate_creation_info())?; let handle = window.get_raw_handle(); - self.raw_window_handles.write().push(handle); + self.raw_window_handles.write().push(handle.into()); Ok(Box::new(window)) } diff --git a/crates/gpui/src/platform/windows/util.rs b/crates/gpui/src/platform/windows/util.rs index 5fb8febe3b..af71dfe4a1 100644 --- a/crates/gpui/src/platform/windows/util.rs +++ b/crates/gpui/src/platform/windows/util.rs @@ -1,14 +1,18 @@ use std::sync::OnceLock; use ::util::ResultExt; +use anyhow::Context; use windows::{ UI::{ Color, ViewManagement::{UIColorType, UISettings}, }, Wdk::System::SystemServices::RtlGetVersion, - Win32::{Foundation::*, Graphics::Dwm::*, UI::WindowsAndMessaging::*}, - core::{BOOL, HSTRING}, + Win32::{ + Foundation::*, Graphics::Dwm::*, System::LibraryLoader::LoadLibraryA, + UI::WindowsAndMessaging::*, + }, + core::{BOOL, HSTRING, PCSTR}, }; use crate::*; @@ -197,3 +201,19 @@ pub(crate) fn show_error(title: &str, content: String) { ) }; } + +pub(crate) fn with_dll_library(dll_name: PCSTR, f: F) -> Result +where + F: FnOnce(HMODULE) -> Result, +{ + let library = unsafe { + LoadLibraryA(dll_name).with_context(|| format!("Loading dll: {}", dll_name.display()))? + }; + let result = f(library); + unsafe { + FreeLibrary(library) + .with_context(|| format!("Freeing dll: {}", dll_name.display())) + .log_err(); + } + result +} diff --git a/crates/gpui/src/platform/windows/vsync.rs b/crates/gpui/src/platform/windows/vsync.rs new file mode 100644 index 0000000000..09dbfd0231 --- /dev/null +++ b/crates/gpui/src/platform/windows/vsync.rs @@ -0,0 +1,174 @@ +use std::{ + sync::LazyLock, + time::{Duration, Instant}, +}; + +use anyhow::{Context, Result}; +use util::ResultExt; +use windows::{ + Win32::{ + Foundation::{HANDLE, HWND}, + Graphics::{ + DirectComposition::{ + COMPOSITION_FRAME_ID_COMPLETED, COMPOSITION_FRAME_ID_TYPE, COMPOSITION_FRAME_STATS, + COMPOSITION_TARGET_ID, + }, + Dwm::{DWM_TIMING_INFO, DwmFlush, DwmGetCompositionTimingInfo}, + }, + System::{ + LibraryLoader::{GetModuleHandleA, GetProcAddress}, + Performance::QueryPerformanceFrequency, + Threading::INFINITE, + }, + }, + core::{HRESULT, s}, +}; + +static QPC_TICKS_PER_SECOND: LazyLock = LazyLock::new(|| { + let mut frequency = 0; + // On systems that run Windows XP or later, the function will always succeed and + // will thus never return zero. + unsafe { QueryPerformanceFrequency(&mut frequency).unwrap() }; + frequency as u64 +}); + +const VSYNC_INTERVAL_THRESHOLD: Duration = Duration::from_millis(1); +const DEFAULT_VSYNC_INTERVAL: Duration = Duration::from_micros(16_666); // ~60Hz + +// Here we are using dynamic loading of DirectComposition functions, +// or the app will refuse to start on windows systems that do not support DirectComposition. +type DCompositionGetFrameId = + unsafe extern "system" fn(frameidtype: COMPOSITION_FRAME_ID_TYPE, frameid: *mut u64) -> HRESULT; +type DCompositionGetStatistics = unsafe extern "system" fn( + frameid: u64, + framestats: *mut COMPOSITION_FRAME_STATS, + targetidcount: u32, + targetids: *mut COMPOSITION_TARGET_ID, + actualtargetidcount: *mut u32, +) -> HRESULT; +type DCompositionWaitForCompositorClock = + unsafe extern "system" fn(count: u32, handles: *const HANDLE, timeoutinms: u32) -> u32; + +pub(crate) struct VSyncProvider { + interval: Duration, + f: Box bool>, +} + +impl VSyncProvider { + pub(crate) fn new() -> Self { + if let Some((get_frame_id, get_statistics, wait_for_comp_clock)) = + initialize_direct_composition() + .context("Retrieving DirectComposition functions") + .log_with_level(log::Level::Warn) + { + let interval = get_dwm_interval_from_direct_composition(get_frame_id, get_statistics) + .context("Failed to get DWM interval from DirectComposition") + .log_err() + .unwrap_or(DEFAULT_VSYNC_INTERVAL); + log::info!( + "DirectComposition is supported for VSync, interval: {:?}", + interval + ); + let f = Box::new(move || unsafe { + wait_for_comp_clock(0, std::ptr::null(), INFINITE) == 0 + }); + Self { interval, f } + } else { + let interval = get_dwm_interval() + .context("Failed to get DWM interval") + .log_err() + .unwrap_or(DEFAULT_VSYNC_INTERVAL); + log::info!( + "DirectComposition is not supported for VSync, falling back to DWM, interval: {:?}", + interval + ); + let f = Box::new(|| unsafe { DwmFlush().is_ok() }); + Self { interval, f } + } + } + + pub(crate) fn wait_for_vsync(&self) { + let vsync_start = Instant::now(); + let wait_succeeded = (self.f)(); + let elapsed = vsync_start.elapsed(); + // DwmFlush and DCompositionWaitForCompositorClock returns very early + // instead of waiting until vblank when the monitor goes to sleep or is + // unplugged (nothing to present due to desktop occlusion). We use 1ms as + // a threshhold for the duration of the wait functions and fallback to + // Sleep() if it returns before that. This could happen during normal + // operation for the first call after the vsync thread becomes non-idle, + // but it shouldn't happen often. + if !wait_succeeded || elapsed < VSYNC_INTERVAL_THRESHOLD { + log::warn!("VSyncProvider::wait_for_vsync() took shorter than expected"); + std::thread::sleep(self.interval); + } + } +} + +fn initialize_direct_composition() -> Result<( + DCompositionGetFrameId, + DCompositionGetStatistics, + DCompositionWaitForCompositorClock, +)> { + unsafe { + // Load DLL at runtime since older Windows versions don't have dcomp. + let hmodule = GetModuleHandleA(s!("dcomp.dll")).context("Loading dcomp.dll")?; + let get_frame_id_addr = GetProcAddress(hmodule, s!("DCompositionGetFrameId")) + .context("Function DCompositionGetFrameId not found")?; + let get_statistics_addr = GetProcAddress(hmodule, s!("DCompositionGetStatistics")) + .context("Function DCompositionGetStatistics not found")?; + let wait_for_compositor_clock_addr = + GetProcAddress(hmodule, s!("DCompositionWaitForCompositorClock")) + .context("Function DCompositionWaitForCompositorClock not found")?; + let get_frame_id: DCompositionGetFrameId = std::mem::transmute(get_frame_id_addr); + let get_statistics: DCompositionGetStatistics = std::mem::transmute(get_statistics_addr); + let wait_for_compositor_clock: DCompositionWaitForCompositorClock = + std::mem::transmute(wait_for_compositor_clock_addr); + Ok((get_frame_id, get_statistics, wait_for_compositor_clock)) + } +} + +fn get_dwm_interval_from_direct_composition( + get_frame_id: DCompositionGetFrameId, + get_statistics: DCompositionGetStatistics, +) -> Result { + let mut frame_id = 0; + unsafe { get_frame_id(COMPOSITION_FRAME_ID_COMPLETED, &mut frame_id) }.ok()?; + let mut stats = COMPOSITION_FRAME_STATS::default(); + unsafe { + get_statistics( + frame_id, + &mut stats, + 0, + std::ptr::null_mut(), + std::ptr::null_mut(), + ) + } + .ok()?; + Ok(retrieve_duration(stats.framePeriod, *QPC_TICKS_PER_SECOND)) +} + +fn get_dwm_interval() -> Result { + let mut timing_info = DWM_TIMING_INFO { + cbSize: std::mem::size_of::() as u32, + ..Default::default() + }; + unsafe { DwmGetCompositionTimingInfo(HWND::default(), &mut timing_info) }?; + let interval = retrieve_duration(timing_info.qpcRefreshPeriod, *QPC_TICKS_PER_SECOND); + // Check for interval values that are impossibly low. A 29 microsecond + // interval was seen (from a qpcRefreshPeriod of 60). + if interval < VSYNC_INTERVAL_THRESHOLD { + Ok(retrieve_duration( + timing_info.rateRefresh.uiDenominator as u64, + timing_info.rateRefresh.uiNumerator as u64, + )) + } else { + Ok(interval) + } +} + +#[inline] +fn retrieve_duration(counts: u64, ticks_per_second: u64) -> Duration { + let ticks_per_microsecond = ticks_per_second / 1_000_000; + Duration::from_micros(counts / ticks_per_microsecond) +} diff --git a/crates/gpui/src/platform/windows/wrapper.rs b/crates/gpui/src/platform/windows/wrapper.rs index a1fe98a392..60bbc433ca 100644 --- a/crates/gpui/src/platform/windows/wrapper.rs +++ b/crates/gpui/src/platform/windows/wrapper.rs @@ -1,6 +1,6 @@ use std::ops::Deref; -use windows::Win32::UI::WindowsAndMessaging::HCURSOR; +use windows::Win32::{Foundation::HWND, UI::WindowsAndMessaging::HCURSOR}; #[derive(Debug, Clone, Copy)] pub(crate) struct SafeCursor { @@ -23,3 +23,31 @@ impl Deref for SafeCursor { &self.raw } } + +#[derive(Debug, Clone, Copy)] +pub(crate) struct SafeHwnd { + raw: HWND, +} + +impl SafeHwnd { + pub(crate) fn as_raw(&self) -> HWND { + self.raw + } +} + +unsafe impl Send for SafeHwnd {} +unsafe impl Sync for SafeHwnd {} + +impl From for SafeHwnd { + fn from(value: HWND) -> Self { + SafeHwnd { raw: value } + } +} + +impl Deref for SafeHwnd { + type Target = HWND; + + fn deref(&self) -> &Self::Target { + &self.raw + } +}