agent2: Port more tools (#35987)
Release Notes: - N/A --------- Co-authored-by: Ben Brandt <benjamin.j.brandt@gmail.com> Co-authored-by: Antonio Scandurra <me@as-cii.com>
This commit is contained in:
parent
365b5aa31d
commit
bb6ea22944
8 changed files with 352 additions and 24 deletions
161
crates/agent2/src/tools/fetch_tool.rs
Normal file
161
crates/agent2/src/tools/fetch_tool.rs
Normal file
|
@ -0,0 +1,161 @@
|
|||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
use std::{borrow::Cow, cell::RefCell};
|
||||
|
||||
use agent_client_protocol as acp;
|
||||
use anyhow::{Context as _, Result, bail};
|
||||
use futures::AsyncReadExt as _;
|
||||
use gpui::{App, AppContext as _, Task};
|
||||
use html_to_markdown::{TagHandler, convert_html_to_markdown, markdown};
|
||||
use http_client::{AsyncBody, HttpClientWithUrl};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use ui::SharedString;
|
||||
use util::markdown::MarkdownEscaped;
|
||||
|
||||
use crate::{AgentTool, ToolCallEventStream};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
|
||||
enum ContentType {
|
||||
Html,
|
||||
Plaintext,
|
||||
Json,
|
||||
}
|
||||
|
||||
/// Fetches a URL and returns the content as Markdown.
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct FetchToolInput {
|
||||
/// The URL to fetch.
|
||||
url: String,
|
||||
}
|
||||
|
||||
pub struct FetchTool {
|
||||
http_client: Arc<HttpClientWithUrl>,
|
||||
}
|
||||
|
||||
impl FetchTool {
|
||||
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
|
||||
Self { http_client }
|
||||
}
|
||||
|
||||
async fn build_message(http_client: Arc<HttpClientWithUrl>, url: &str) -> Result<String> {
|
||||
let url = if !url.starts_with("https://") && !url.starts_with("http://") {
|
||||
Cow::Owned(format!("https://{url}"))
|
||||
} else {
|
||||
Cow::Borrowed(url)
|
||||
};
|
||||
|
||||
let mut response = http_client.get(&url, AsyncBody::default(), true).await?;
|
||||
|
||||
let mut body = Vec::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_end(&mut body)
|
||||
.await
|
||||
.context("error reading response body")?;
|
||||
|
||||
if response.status().is_client_error() {
|
||||
let text = String::from_utf8_lossy(body.as_slice());
|
||||
bail!(
|
||||
"status error {}, response: {text:?}",
|
||||
response.status().as_u16()
|
||||
);
|
||||
}
|
||||
|
||||
let Some(content_type) = response.headers().get("content-type") else {
|
||||
bail!("missing Content-Type header");
|
||||
};
|
||||
let content_type = content_type
|
||||
.to_str()
|
||||
.context("invalid Content-Type header")?;
|
||||
|
||||
let content_type = if content_type.starts_with("text/plain") {
|
||||
ContentType::Plaintext
|
||||
} else if content_type.starts_with("application/json") {
|
||||
ContentType::Json
|
||||
} else {
|
||||
ContentType::Html
|
||||
};
|
||||
|
||||
match content_type {
|
||||
ContentType::Html => {
|
||||
let mut handlers: Vec<TagHandler> = vec![
|
||||
Rc::new(RefCell::new(markdown::WebpageChromeRemover)),
|
||||
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||
Rc::new(RefCell::new(markdown::TableHandler::new())),
|
||||
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||
];
|
||||
if url.contains("wikipedia.org") {
|
||||
use html_to_markdown::structure::wikipedia;
|
||||
|
||||
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover)));
|
||||
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler)));
|
||||
handlers.push(Rc::new(
|
||||
RefCell::new(wikipedia::WikipediaCodeHandler::new()),
|
||||
));
|
||||
} else {
|
||||
handlers.push(Rc::new(RefCell::new(markdown::CodeHandler)));
|
||||
}
|
||||
|
||||
convert_html_to_markdown(&body[..], &mut handlers)
|
||||
}
|
||||
ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
|
||||
ContentType::Json => {
|
||||
let json: serde_json::Value = serde_json::from_slice(&body)?;
|
||||
|
||||
Ok(format!(
|
||||
"```json\n{}\n```",
|
||||
serde_json::to_string_pretty(&json)?
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AgentTool for FetchTool {
|
||||
type Input = FetchToolInput;
|
||||
type Output = String;
|
||||
|
||||
fn name(&self) -> SharedString {
|
||||
"fetch".into()
|
||||
}
|
||||
|
||||
fn kind(&self) -> acp::ToolKind {
|
||||
acp::ToolKind::Fetch
|
||||
}
|
||||
|
||||
fn initial_title(&self, input: Result<Self::Input, serde_json::Value>) -> SharedString {
|
||||
match input {
|
||||
Ok(input) => format!("Fetch {}", MarkdownEscaped(&input.url)).into(),
|
||||
Err(_) => "Fetch URL".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn run(
|
||||
self: Arc<Self>,
|
||||
input: Self::Input,
|
||||
event_stream: ToolCallEventStream,
|
||||
cx: &mut App,
|
||||
) -> Task<Result<Self::Output>> {
|
||||
let text = cx.background_spawn({
|
||||
let http_client = self.http_client.clone();
|
||||
async move { Self::build_message(http_client, &input.url).await }
|
||||
});
|
||||
|
||||
cx.foreground_executor().spawn(async move {
|
||||
let text = text.await?;
|
||||
if text.trim().is_empty() {
|
||||
bail!("no textual content found");
|
||||
}
|
||||
|
||||
event_stream.update_fields(acp::ToolCallUpdateFields {
|
||||
content: Some(vec![text.clone().into()]),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
Ok(text)
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue