gpui: Provide workaround for AMD Linux driver bug (#26890)

There apparently is some amdgpu/radv bug that rendering with
multisample anti-aliasing (MSAA) results in a crash when the bounds
of a triangle list exceed 1024px, which in Zed happens with the default
buffer font size when you select a line with more than 144 characters.

This crash has been reported as #26143.

This commit introduces a workaround: you can set the
ZED_PATH_SAMPLE_COUNT=0
environment variable to disable MSAA and the error message we print
when a GPU crash is encountered with radv now suggests trying this
environment
variable as a workaround and links the respective issue.

Sidenote: MSAA was introduced in
f08b1d78ec
so you didn't run into this driver bug with versions < 0.173.8.

Release Notes:

- Added a workaround for an AMD Linux driver bug that causes Zed to
crash when selecting long lines.
This commit is contained in:
Martin Fischer 2025-03-18 21:11:09 +01:00 committed by GitHub
parent 48fe134408
commit 4a39fc2644
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 24 additions and 6 deletions

View file

@ -18,7 +18,7 @@ use std::{mem, sync::Arc};
const MAX_FRAME_TIME_MS: u32 = 10000;
// Use 4x MSAA, all devices support it.
// https://developer.apple.com/documentation/metal/mtldevice/1433355-supportstexturesamplecount
const PATH_SAMPLE_COUNT: u32 = 4;
const DEFAULT_PATH_SAMPLE_COUNT: u32 = 4;
#[repr(C)]
#[derive(Clone, Copy, Pod, Zeroable)]
@ -130,7 +130,7 @@ struct BladePipelines {
}
impl BladePipelines {
fn new(gpu: &gpu::Context, surface_info: gpu::SurfaceInfo) -> Self {
fn new(gpu: &gpu::Context, surface_info: gpu::SurfaceInfo, path_sample_count: u32) -> Self {
use gpu::ShaderData as _;
log::info!(
@ -211,7 +211,7 @@ impl BladePipelines {
write_mask: gpu::ColorWrites::default(),
}],
multisample_state: gpu::MultisampleState {
sample_count: PATH_SAMPLE_COUNT,
sample_count: path_sample_count,
..Default::default()
},
}),
@ -322,6 +322,7 @@ pub struct BladeRenderer {
atlas_sampler: gpu::Sampler,
#[cfg(target_os = "macos")]
core_video_texture_cache: CVMetalTextureCache,
path_sample_count: u32,
}
impl BladeRenderer {
@ -347,13 +348,18 @@ impl BladeRenderer {
name: "main",
buffer_count: 2,
});
let pipelines = BladePipelines::new(&context.gpu, surface.info());
// workaround for https://github.com/zed-industries/zed/issues/26143
let path_sample_count = std::env::var("ZED_PATH_SAMPLE_COUNT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(DEFAULT_PATH_SAMPLE_COUNT);
let pipelines = BladePipelines::new(&context.gpu, surface.info(), path_sample_count);
let instance_belt = BufferBelt::new(BufferBeltDescriptor {
memory: gpu::Memory::Shared,
min_chunk_size: 0x1000,
alignment: 0x40, // Vulkan `minStorageBufferOffsetAlignment` on Intel Xe
});
let atlas = Arc::new(BladeAtlas::new(&context.gpu, PATH_SAMPLE_COUNT));
let atlas = Arc::new(BladeAtlas::new(&context.gpu, path_sample_count));
let atlas_sampler = context.gpu.create_sampler(gpu::SamplerDesc {
name: "atlas",
mag_filter: gpu::FilterMode::Linear,
@ -382,6 +388,7 @@ impl BladeRenderer {
atlas_sampler,
#[cfg(target_os = "macos")]
core_video_texture_cache,
path_sample_count,
})
}
@ -389,6 +396,15 @@ impl BladeRenderer {
if let Some(last_sp) = self.last_sync_point.take() {
if !self.gpu.wait_for(&last_sp, MAX_FRAME_TIME_MS) {
log::error!("GPU hung");
#[cfg(target_os = "linux")]
if self.gpu.device_information().driver_name == "radv" {
log::error!("there's a known bug with amdgpu/radv, try setting ZED_PATH_SAMPLE_COUNT=0 as a workaround");
log::error!("if that helps you're running into https://github.com/zed-industries/zed/issues/26143");
}
log::error!(
"your device information is: {:?}",
self.gpu.device_information()
);
while !self.gpu.wait_for(&last_sp, MAX_FRAME_TIME_MS) {}
}
}
@ -428,7 +444,8 @@ impl BladeRenderer {
self.gpu
.reconfigure_surface(&mut self.surface, self.surface_config);
self.pipelines.destroy(&self.gpu);
self.pipelines = BladePipelines::new(&self.gpu, self.surface.info());
self.pipelines =
BladePipelines::new(&self.gpu, self.surface.info(), self.path_sample_count);
}
}

View file

@ -111,6 +111,7 @@ On some systems the file `/etc/prime-discrete` can be used to enforce the use of
On others, you may be able to the environment variable `DRI_PRIME=1` when running Zed to force the use of the discrete GPU.
If you're using an AMD GPU and Zed crashes when selecting long lines, try setting the `ZED_PATH_SAMPLE_COUNT=0` environment variable. (See [#26143](https://github.com/zed-industries/zed/issues/26143))
If you're using an AMD GPU, you might get a 'Broken Pipe' error. Try using the RADV or Mesa drivers. (See [#13880](https://github.com/zed-industries/zed/issues/13880))
If you are using Mesa, and want more control over which GPU is selected you can run `MESA_VK_DEVICE_SELECT=list zed --foreground` to get a list of available GPUs and then export `MESA_VK_DEVICE_SELECT=xxxx:yyyy` to choose a specific device.