agent: Fix crash with pathological fetch output (#34253)

Closes #34029

The crash is due to a stack overflow in our `html_to_markdown`
conversion; I've added a maximum depth of 200 for the recursion in that
crate to guard against this kind of thing.

Separately, we were treating all content-types other than `text/plain`
and `application/json` as HTML; I've changed this to only treat
`text/html` and `application/xhtml+xml` as HTML, and fall back to
plaintext. (In the original crash, the content-type was
`application/octet-stream`.)

Release Notes:

- agent: Fixed a potential crash when fetching large non-HTML files.
This commit is contained in:
Cole Miller 2025-07-11 21:01:09 -04:00 committed by GitHub
parent 625ce12a3e
commit 5b61b8c8ed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 6 additions and 5 deletions

View file

@ -69,10 +69,9 @@ impl FetchTool {
.to_str()
.context("invalid Content-Type header")?;
let content_type = match content_type {
"text/html" => ContentType::Html,
"text/plain" => ContentType::Plaintext,
"text/html" | "application/xhtml+xml" => ContentType::Html,
"application/json" => ContentType::Json,
_ => ContentType::Html,
_ => ContentType::Plaintext,
};
match content_type {

View file

@ -119,8 +119,10 @@ impl MarkdownWriter {
.push_back(current_element.clone());
}
for child in node.children.borrow().iter() {
self.visit_node(child, handlers)?;
if self.current_element_stack.len() < 200 {
for child in node.children.borrow().iter() {
self.visit_node(child, handlers)?;
}
}
if let Some(current_element) = current_element {