debugger: Add memory view (#33955)

This is mostly setting up the UI for now; I expect it to be the biggest
chunk of work.

Release Notes:

- debugger: Added memory view

---------

Co-authored-by: Anthony Eid <hello@anthonyeid.me>
Co-authored-by: Mikayla Maki <mikayla.c.maki@gmail.com>
Co-authored-by: Mikayla Maki <mikayla@zed.dev>
This commit is contained in:
Piotr Osiewicz 2025-07-14 16:32:06 +02:00 committed by GitHub
parent a2f5c47e2d
commit 6673c7cd4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1732 additions and 71 deletions

View file

@ -31,6 +31,7 @@ aho-corasick.workspace = true
anyhow.workspace = true
askpass.workspace = true
async-trait.workspace = true
base64.workspace = true
buffer_diff.workspace = true
circular-buffer.workspace = true
client.workspace = true
@ -72,6 +73,7 @@ settings.workspace = true
sha2.workspace = true
shellexpand.workspace = true
shlex.workspace = true
smallvec.workspace = true
smol.workspace = true
snippet.workspace = true
snippet_provider.workspace = true

View file

@ -15,7 +15,9 @@ pub mod breakpoint_store;
pub mod dap_command;
pub mod dap_store;
pub mod locators;
mod memory;
pub mod session;
#[cfg(any(feature = "test-support", test))]
pub mod test;
pub use memory::MemoryCell;

View file

@ -1,6 +1,7 @@
use std::sync::Arc;
use anyhow::{Context as _, Ok, Result};
use base64::Engine;
use dap::{
Capabilities, ContinueArguments, ExceptionFilterOptions, InitializeRequestArguments,
InitializeRequestArgumentsPathFormat, NextArguments, SetVariableResponse, SourceBreakpoint,
@ -1774,3 +1775,95 @@ impl DapCommand for LocationsCommand {
})
}
}
#[derive(Debug, Hash, PartialEq, Eq)]
pub(crate) struct ReadMemory {
pub(crate) memory_reference: String,
pub(crate) offset: Option<u64>,
pub(crate) count: u64,
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct ReadMemoryResponse {
pub(super) address: Arc<str>,
pub(super) unreadable_bytes: Option<u64>,
pub(super) content: Arc<[u8]>,
}
impl LocalDapCommand for ReadMemory {
type Response = ReadMemoryResponse;
type DapRequest = dap::requests::ReadMemory;
const CACHEABLE: bool = true;
fn is_supported(capabilities: &Capabilities) -> bool {
capabilities
.supports_read_memory_request
.unwrap_or_default()
}
fn to_dap(&self) -> <Self::DapRequest as dap::requests::Request>::Arguments {
dap::ReadMemoryArguments {
memory_reference: self.memory_reference.clone(),
offset: self.offset,
count: self.count,
}
}
fn response_from_dap(
&self,
message: <Self::DapRequest as dap::requests::Request>::Response,
) -> Result<Self::Response> {
let data = if let Some(data) = message.data {
base64::engine::general_purpose::STANDARD
.decode(data)
.log_err()
.context("parsing base64 data from DAP's ReadMemory response")?
} else {
vec![]
};
Ok(ReadMemoryResponse {
address: message.address.into(),
content: data.into(),
unreadable_bytes: message.unreadable_bytes,
})
}
}
impl LocalDapCommand for dap::DataBreakpointInfoArguments {
type Response = dap::DataBreakpointInfoResponse;
type DapRequest = dap::requests::DataBreakpointInfo;
const CACHEABLE: bool = true;
fn is_supported(capabilities: &Capabilities) -> bool {
capabilities.supports_data_breakpoints.unwrap_or_default()
}
fn to_dap(&self) -> <Self::DapRequest as dap::requests::Request>::Arguments {
self.clone()
}
fn response_from_dap(
&self,
message: <Self::DapRequest as dap::requests::Request>::Response,
) -> Result<Self::Response> {
Ok(message)
}
}
impl LocalDapCommand for dap::WriteMemoryArguments {
type Response = dap::WriteMemoryResponse;
type DapRequest = dap::requests::WriteMemory;
fn is_supported(capabilities: &Capabilities) -> bool {
capabilities
.supports_write_memory_request
.unwrap_or_default()
}
fn to_dap(&self) -> <Self::DapRequest as dap::requests::Request>::Arguments {
self.clone()
}
fn response_from_dap(
&self,
message: <Self::DapRequest as dap::requests::Request>::Response,
) -> Result<Self::Response> {
Ok(message)
}
}

View file

@ -0,0 +1,384 @@
//! This module defines the format in which memory of debuggee is represented.
//!
//! Each byte in memory can either be mapped or unmapped. We try to mimic that twofold:
//! - We assume that the memory is divided into pages of a fixed size.
//! - We assume that each page can be either mapped or unmapped.
//! These two assumptions drive the shape of the memory representation.
//! In particular, we want the unmapped pages to be represented without allocating any memory, as *most*
//! of the memory in a program space is usually unmapped.
//! Note that per DAP we don't know what the address space layout is, so we can't optimize off of it.
//! Note that while we optimize for a paged layout, we also want to be able to represent memory that is not paged.
//! This use case is relevant to embedded folks. Furthermore, we cater to default 4k page size.
//! It is picked arbitrarily as a ubiquous default - other than that, the underlying format of Zed's memory storage should not be relevant
//! to the users of this module.
use std::{collections::BTreeMap, ops::RangeInclusive, sync::Arc};
use gpui::BackgroundExecutor;
use smallvec::SmallVec;
const PAGE_SIZE: u64 = 4096;
/// Represents the contents of a single page. We special-case unmapped pages to be allocation-free,
/// since they're going to make up the majority of the memory in a program space (even though the user might not even get to see them - ever).
#[derive(Clone, Debug)]
pub(super) enum PageContents {
/// Whole page is unreadable.
Unmapped,
Mapped(Arc<MappedPageContents>),
}
impl PageContents {
#[cfg(test)]
fn mapped(contents: Vec<u8>) -> Self {
PageContents::Mapped(Arc::new(MappedPageContents(
vec![PageChunk::Mapped(contents.into())].into(),
)))
}
}
#[derive(Clone, Debug)]
enum PageChunk {
Mapped(Arc<[u8]>),
Unmapped(u64),
}
impl PageChunk {
fn len(&self) -> u64 {
match self {
PageChunk::Mapped(contents) => contents.len() as u64,
PageChunk::Unmapped(size) => *size,
}
}
}
impl MappedPageContents {
fn len(&self) -> u64 {
self.0.iter().map(|chunk| chunk.len()).sum()
}
}
/// We hope for the whole page to be mapped in a single chunk, but we do leave the possibility open
/// of having interleaved read permissions in a single page; debuggee's execution environment might either
/// have a different page size OR it might not have paged memory layout altogether
/// (which might be relevant to embedded systems).
///
/// As stated previously, the concept of a page in this module has to do more
/// with optimizing fetching of the memory and not with the underlying bits and pieces
/// of the memory of a debuggee.
#[derive(Default, Debug)]
pub(super) struct MappedPageContents(
/// Most of the time there should be only one chunk (either mapped or unmapped),
/// but we do leave the possibility open of having multiple regions of memory in a single page.
SmallVec<[PageChunk; 1]>,
);
type MemoryAddress = u64;
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
#[repr(transparent)]
pub(super) struct PageAddress(u64);
impl PageAddress {
pub(super) fn iter_range(
range: RangeInclusive<PageAddress>,
) -> impl Iterator<Item = PageAddress> {
let mut current = range.start().0;
let end = range.end().0;
std::iter::from_fn(move || {
if current > end {
None
} else {
let addr = PageAddress(current);
current += PAGE_SIZE;
Some(addr)
}
})
}
}
pub(super) struct Memory {
pages: BTreeMap<PageAddress, PageContents>,
}
/// Represents a single memory cell (or None if a given cell is unmapped/unknown).
#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
#[repr(transparent)]
pub struct MemoryCell(pub Option<u8>);
impl Memory {
pub(super) fn new() -> Self {
Self {
pages: Default::default(),
}
}
pub(super) fn memory_range_to_page_range(
range: RangeInclusive<MemoryAddress>,
) -> RangeInclusive<PageAddress> {
let start_page = (range.start() / PAGE_SIZE) * PAGE_SIZE;
let end_page = (range.end() / PAGE_SIZE) * PAGE_SIZE;
PageAddress(start_page)..=PageAddress(end_page)
}
pub(super) fn build_page(&self, page_address: PageAddress) -> Option<MemoryPageBuilder> {
if self.pages.contains_key(&page_address) {
// We already know the state of this page.
None
} else {
Some(MemoryPageBuilder::new(page_address))
}
}
pub(super) fn insert_page(&mut self, address: PageAddress, page: PageContents) {
self.pages.insert(address, page);
}
pub(super) fn memory_range(&self, range: RangeInclusive<MemoryAddress>) -> MemoryIterator {
let pages = Self::memory_range_to_page_range(range.clone());
let pages = self
.pages
.range(pages)
.map(|(address, page)| (*address, page.clone()))
.collect::<Vec<_>>();
MemoryIterator::new(range, pages.into_iter())
}
pub(crate) fn clear(&mut self, background_executor: &BackgroundExecutor) {
let memory = std::mem::take(&mut self.pages);
background_executor
.spawn(async move {
drop(memory);
})
.detach();
}
}
/// Builder for memory pages.
///
/// Memory reads in DAP are sequential (or at least we make them so).
/// ReadMemory response includes `unreadableBytes` property indicating the number of bytes
/// that could not be read after the last successfully read byte.
///
/// We use it as follows:
/// - We start off with a "large" 1-page ReadMemory request.
/// - If it succeeds/fails wholesale, cool; we have no unknown memory regions in this page.
/// - If it succeeds partially, we know # of mapped bytes.
/// We might also know the # of unmapped bytes.
/// However, we're still unsure about what's *after* the unreadable region.
///
/// This is where this builder comes in. It lets us track the state of figuring out contents of a single page.
pub(super) struct MemoryPageBuilder {
chunks: MappedPageContents,
base_address: PageAddress,
left_to_read: u64,
}
/// Represents a chunk of memory of which we don't know if it's mapped or unmapped; thus we need
/// to issue a request to figure out it's state.
pub(super) struct UnknownMemory {
pub(super) address: MemoryAddress,
pub(super) size: u64,
}
impl MemoryPageBuilder {
fn new(base_address: PageAddress) -> Self {
Self {
chunks: Default::default(),
base_address,
left_to_read: PAGE_SIZE,
}
}
pub(super) fn build(self) -> (PageAddress, PageContents) {
debug_assert_eq!(self.left_to_read, 0);
debug_assert_eq!(
self.chunks.len(),
PAGE_SIZE,
"Expected `build` to be called on a fully-fetched page"
);
let contents = if let Some(first) = self.chunks.0.first()
&& self.chunks.len() == 1
&& matches!(first, PageChunk::Unmapped(PAGE_SIZE))
{
PageContents::Unmapped
} else {
PageContents::Mapped(Arc::new(MappedPageContents(self.chunks.0)))
};
(self.base_address, contents)
}
/// Drives the fetching of memory, in an iterator-esque style.
pub(super) fn next_request(&self) -> Option<UnknownMemory> {
if self.left_to_read == 0 {
None
} else {
let offset_in_current_page = PAGE_SIZE - self.left_to_read;
Some(UnknownMemory {
address: self.base_address.0 + offset_in_current_page,
size: self.left_to_read,
})
}
}
pub(super) fn unknown(&mut self, bytes: u64) {
if bytes == 0 {
return;
}
self.left_to_read -= bytes;
self.chunks.0.push(PageChunk::Unmapped(bytes));
}
pub(super) fn known(&mut self, data: Arc<[u8]>) {
if data.is_empty() {
return;
}
self.left_to_read -= data.len() as u64;
self.chunks.0.push(PageChunk::Mapped(data));
}
}
fn page_contents_into_iter(data: Arc<MappedPageContents>) -> Box<dyn Iterator<Item = MemoryCell>> {
let mut data_range = 0..data.0.len();
let iter = std::iter::from_fn(move || {
let data = &data;
let data_ref = data.clone();
data_range.next().map(move |index| {
let contents = &data_ref.0[index];
match contents {
PageChunk::Mapped(items) => {
let chunk_range = 0..items.len();
let items = items.clone();
Box::new(
chunk_range
.into_iter()
.map(move |ix| MemoryCell(Some(items[ix]))),
) as Box<dyn Iterator<Item = MemoryCell>>
}
PageChunk::Unmapped(len) => {
Box::new(std::iter::repeat_n(MemoryCell(None), *len as usize))
}
}
})
})
.flatten();
Box::new(iter)
}
/// Defines an iteration over a range of memory. Some of this memory might be unmapped or straight up missing.
/// Thus, this iterator alternates between synthesizing values and yielding known memory.
pub struct MemoryIterator {
start: MemoryAddress,
end: MemoryAddress,
current_known_page: Option<(PageAddress, Box<dyn Iterator<Item = MemoryCell>>)>,
pages: std::vec::IntoIter<(PageAddress, PageContents)>,
}
impl MemoryIterator {
fn new(
range: RangeInclusive<MemoryAddress>,
pages: std::vec::IntoIter<(PageAddress, PageContents)>,
) -> Self {
Self {
start: *range.start(),
end: *range.end(),
current_known_page: None,
pages,
}
}
fn fetch_next_page(&mut self) -> bool {
if let Some((mut address, chunk)) = self.pages.next() {
let mut contents = match chunk {
PageContents::Unmapped => None,
PageContents::Mapped(mapped_page_contents) => {
Some(page_contents_into_iter(mapped_page_contents))
}
};
if address.0 < self.start {
// Skip ahead till our iterator is at the start of the range
//address: 20, start: 25
//
let to_skip = self.start - address.0;
address.0 += to_skip;
if let Some(contents) = &mut contents {
contents.nth(to_skip as usize - 1);
}
}
self.current_known_page = contents.map(|contents| (address, contents));
true
} else {
false
}
}
}
impl Iterator for MemoryIterator {
type Item = MemoryCell;
fn next(&mut self) -> Option<Self::Item> {
if self.start > self.end {
return None;
}
if let Some((current_page_address, current_memory_chunk)) = self.current_known_page.as_mut()
{
if current_page_address.0 <= self.start {
if let Some(next_cell) = current_memory_chunk.next() {
self.start += 1;
return Some(next_cell);
} else {
self.current_known_page.take();
}
}
}
if !self.fetch_next_page() {
self.start += 1;
return Some(MemoryCell(None));
} else {
self.next()
}
}
}
#[cfg(test)]
mod tests {
use crate::debugger::{
MemoryCell,
memory::{MemoryIterator, PageAddress, PageContents},
};
#[test]
fn iterate_over_unmapped_memory() {
let empty_iterator = MemoryIterator::new(0..=127, Default::default());
let actual = empty_iterator.collect::<Vec<_>>();
let expected = vec![MemoryCell(None); 128];
assert_eq!(actual.len(), expected.len());
assert_eq!(actual, expected);
}
#[test]
fn iterate_over_partially_mapped_memory() {
let it = MemoryIterator::new(
0..=127,
vec![(PageAddress(5), PageContents::mapped(vec![1]))].into_iter(),
);
let actual = it.collect::<Vec<_>>();
let expected = std::iter::repeat_n(MemoryCell(None), 5)
.chain(std::iter::once(MemoryCell(Some(1))))
.chain(std::iter::repeat_n(MemoryCell(None), 122))
.collect::<Vec<_>>();
assert_eq!(actual.len(), expected.len());
assert_eq!(actual, expected);
}
#[test]
fn reads_from_the_middle_of_a_page() {
let partial_iter = MemoryIterator::new(
20..=30,
vec![(PageAddress(0), PageContents::mapped((0..255).collect()))].into_iter(),
);
let actual = partial_iter.collect::<Vec<_>>();
let expected = (20..=30)
.map(|val| MemoryCell(Some(val)))
.collect::<Vec<_>>();
assert_eq!(actual.len(), expected.len());
assert_eq!(actual, expected);
}
}

View file

@ -1,4 +1,6 @@
use crate::debugger::breakpoint_store::BreakpointSessionState;
use crate::debugger::dap_command::ReadMemory;
use crate::debugger::memory::{self, Memory, MemoryIterator, MemoryPageBuilder, PageAddress};
use super::breakpoint_store::{
BreakpointStore, BreakpointStoreEvent, BreakpointUpdatedReason, SourceBreakpoint,
@ -13,6 +15,7 @@ use super::dap_command::{
};
use super::dap_store::DapStore;
use anyhow::{Context as _, Result, anyhow};
use base64::Engine;
use collections::{HashMap, HashSet, IndexMap};
use dap::adapters::{DebugAdapterBinary, DebugAdapterName};
use dap::messages::Response;
@ -26,7 +29,7 @@ use dap::{
use dap::{
ExceptionBreakpointsFilter, ExceptionFilterOptions, OutputEvent, OutputEventCategory,
RunInTerminalRequestArguments, StackFramePresentationHint, StartDebuggingRequestArguments,
StartDebuggingRequestArgumentsRequest, VariablePresentationHint,
StartDebuggingRequestArgumentsRequest, VariablePresentationHint, WriteMemoryArguments,
};
use futures::SinkExt;
use futures::channel::mpsc::UnboundedSender;
@ -42,6 +45,7 @@ use serde_json::Value;
use smol::stream::StreamExt;
use std::any::TypeId;
use std::collections::BTreeMap;
use std::ops::RangeInclusive;
use std::u64;
use std::{
any::Any,
@ -52,7 +56,7 @@ use std::{
};
use task::TaskContext;
use text::{PointUtf16, ToPointUtf16};
use util::ResultExt;
use util::{ResultExt, maybe};
use worktree::Worktree;
#[derive(Debug, Copy, Clone, Hash, PartialEq, PartialOrd, Ord, Eq)]
@ -685,6 +689,7 @@ pub struct Session {
background_tasks: Vec<Task<()>>,
restart_task: Option<Task<()>>,
task_context: TaskContext,
memory: memory::Memory,
quirks: SessionQuirks,
}
@ -855,6 +860,7 @@ impl Session {
label,
adapter,
task_context,
memory: memory::Memory::new(),
quirks,
};
@ -1664,6 +1670,11 @@ impl Session {
self.invalidate_command_type::<ModulesCommand>();
self.invalidate_command_type::<LoadedSourcesCommand>();
self.invalidate_command_type::<ThreadsCommand>();
self.invalidate_command_type::<ReadMemory>();
let executor = self.as_running().map(|running| running.executor.clone());
if let Some(executor) = executor {
self.memory.clear(&executor);
}
}
fn invalidate_state(&mut self, key: &RequestSlot) {
@ -1736,6 +1747,135 @@ impl Session {
&self.modules
}
// CodeLLDB returns the size of a pointed-to-memory, which we can use to make the experience of go-to-memory better.
pub fn data_access_size(
&mut self,
frame_id: Option<u64>,
evaluate_name: &str,
cx: &mut Context<Self>,
) -> Task<Option<u64>> {
let request = self.request(
EvaluateCommand {
expression: format!("?${{sizeof({evaluate_name})}}"),
frame_id,
context: Some(EvaluateArgumentsContext::Repl),
source: None,
},
|_, response, _| response.ok(),
cx,
);
cx.background_spawn(async move {
let result = request.await?;
result.result.parse().ok()
})
}
pub fn memory_reference_of_expr(
&mut self,
frame_id: Option<u64>,
expression: String,
cx: &mut Context<Self>,
) -> Task<Option<String>> {
let request = self.request(
EvaluateCommand {
expression,
frame_id,
context: Some(EvaluateArgumentsContext::Repl),
source: None,
},
|_, response, _| response.ok(),
cx,
);
cx.background_spawn(async move {
let result = request.await?;
result.memory_reference
})
}
pub fn write_memory(&mut self, address: u64, data: &[u8], cx: &mut Context<Self>) {
let data = base64::engine::general_purpose::STANDARD.encode(data);
self.request(
WriteMemoryArguments {
memory_reference: address.to_string(),
data,
allow_partial: None,
offset: None,
},
|this, response, cx| {
this.memory.clear(cx.background_executor());
this.invalidate_command_type::<ReadMemory>();
this.invalidate_command_type::<VariablesCommand>();
cx.emit(SessionEvent::Variables);
response.ok()
},
cx,
)
.detach();
}
pub fn read_memory(
&mut self,
range: RangeInclusive<u64>,
cx: &mut Context<Self>,
) -> MemoryIterator {
// This function is a bit more involved when it comes to fetching data.
// Since we attempt to read memory in pages, we need to account for some parts
// of memory being unreadable. Therefore, we start off by fetching a page per request.
// In case that fails, we try to re-fetch smaller regions until we have the full range.
let page_range = Memory::memory_range_to_page_range(range.clone());
for page_address in PageAddress::iter_range(page_range) {
self.read_single_page_memory(page_address, cx);
}
self.memory.memory_range(range)
}
fn read_single_page_memory(&mut self, page_start: PageAddress, cx: &mut Context<Self>) {
_ = maybe!({
let builder = self.memory.build_page(page_start)?;
self.memory_read_fetch_page_recursive(builder, cx);
Some(())
});
}
fn memory_read_fetch_page_recursive(
&mut self,
mut builder: MemoryPageBuilder,
cx: &mut Context<Self>,
) {
let Some(next_request) = builder.next_request() else {
// We're done fetching. Let's grab the page and insert it into our memory store.
let (address, contents) = builder.build();
self.memory.insert_page(address, contents);
return;
};
let size = next_request.size;
self.fetch(
ReadMemory {
memory_reference: format!("0x{:X}", next_request.address),
offset: Some(0),
count: next_request.size,
},
move |this, memory, cx| {
if let Ok(memory) = memory {
builder.known(memory.content);
if let Some(unknown) = memory.unreadable_bytes {
builder.unknown(unknown);
}
// This is the recursive bit: if we're not yet done with
// the whole page, we'll kick off a new request with smaller range.
// Note that this function is recursive only conceptually;
// since it kicks off a new request with callback, we don't need to worry about stack overflow.
this.memory_read_fetch_page_recursive(builder, cx);
} else {
builder.unknown(size);
}
},
cx,
);
}
pub fn ignore_breakpoints(&self) -> bool {
self.ignore_breakpoints
}
@ -2378,6 +2518,8 @@ impl Session {
move |this, response, cx| {
let response = response.log_err()?;
this.invalidate_command_type::<VariablesCommand>();
this.invalidate_command_type::<ReadMemory>();
this.memory.clear(cx.background_executor());
this.refresh_watchers(stack_frame_id, cx);
cx.emit(SessionEvent::Variables);
Some(response)
@ -2417,6 +2559,8 @@ impl Session {
cx.spawn(async move |this, cx| {
let response = request.await;
this.update(cx, |this, cx| {
this.memory.clear(cx.background_executor());
this.invalidate_command_type::<ReadMemory>();
match response {
Ok(response) => {
let event = dap::OutputEvent {