From 5b61b8c8ed6469c3b12aed6d396561d2c4aee17b Mon Sep 17 00:00:00 2001 From: Cole Miller Date: Fri, 11 Jul 2025 21:01:09 -0400 Subject: [PATCH] agent: Fix crash with pathological fetch output (#34253) Closes #34029 The crash is due to a stack overflow in our `html_to_markdown` conversion; I've added a maximum depth of 200 for the recursion in that crate to guard against this kind of thing. Separately, we were treating all content-types other than `text/plain` and `application/json` as HTML; I've changed this to only treat `text/html` and `application/xhtml+xml` as HTML, and fall back to plaintext. (In the original crash, the content-type was `application/octet-stream`.) Release Notes: - agent: Fixed a potential crash when fetching large non-HTML files. --- crates/assistant_tools/src/fetch_tool.rs | 5 ++--- crates/html_to_markdown/src/markdown_writer.rs | 6 ++++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/assistant_tools/src/fetch_tool.rs b/crates/assistant_tools/src/fetch_tool.rs index c8fa600e83..54d49359ba 100644 --- a/crates/assistant_tools/src/fetch_tool.rs +++ b/crates/assistant_tools/src/fetch_tool.rs @@ -69,10 +69,9 @@ impl FetchTool { .to_str() .context("invalid Content-Type header")?; let content_type = match content_type { - "text/html" => ContentType::Html, - "text/plain" => ContentType::Plaintext, + "text/html" | "application/xhtml+xml" => ContentType::Html, "application/json" => ContentType::Json, - _ => ContentType::Html, + _ => ContentType::Plaintext, }; match content_type { diff --git a/crates/html_to_markdown/src/markdown_writer.rs b/crates/html_to_markdown/src/markdown_writer.rs index a9caf7afa7..c32205ae7b 100644 --- a/crates/html_to_markdown/src/markdown_writer.rs +++ b/crates/html_to_markdown/src/markdown_writer.rs @@ -119,8 +119,10 @@ impl MarkdownWriter { .push_back(current_element.clone()); } - for child in node.children.borrow().iter() { - self.visit_node(child, handlers)?; + if self.current_element_stack.len() < 200 { + for child in node.children.borrow().iter() { + self.visit_node(child, handlers)?; + } } if let Some(current_element) = current_element {