ZIm/crates/language_model/src/request.rs
Marshall Bowers 7abe2c9c31
agent: Attach thread ID and prompt ID to telemetry events (#29069)
This PR attaches the thread ID and the new prompt ID to telemetry events
for completions in the Agent panel.

Release Notes:

- N/A

---------

Co-authored-by: Mikayla Maki <mikayla.c.maki@gmail.com>
2025-04-18 20:41:02 +00:00

253 lines
8.4 KiB
Rust

use std::io::{Cursor, Write};
use std::sync::Arc;
use crate::role::Role;
use crate::{LanguageModelToolUse, LanguageModelToolUseId};
use base64::write::EncoderWriter;
use gpui::{
App, AppContext as _, DevicePixels, Image, ObjectFit, RenderImage, SharedString, Size, Task,
point, px, size,
};
use image::{DynamicImage, ImageDecoder, codecs::png::PngEncoder, imageops::resize};
use serde::{Deserialize, Serialize};
use util::ResultExt;
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
pub struct LanguageModelImage {
/// A base64-encoded PNG image.
pub source: SharedString,
size: Size<DevicePixels>,
}
impl std::fmt::Debug for LanguageModelImage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LanguageModelImage")
.field("source", &format!("<{} bytes>", self.source.len()))
.field("size", &self.size)
.finish()
}
}
/// Anthropic wants uploaded images to be smaller than this in both dimensions.
const ANTHROPIC_SIZE_LIMT: f32 = 1568.;
impl LanguageModelImage {
pub fn from_image(data: Image, cx: &mut App) -> Task<Option<Self>> {
cx.background_spawn(async move {
match data.format() {
gpui::ImageFormat::Png
| gpui::ImageFormat::Jpeg
| gpui::ImageFormat::Webp
| gpui::ImageFormat::Gif => {}
_ => return None,
};
let image = image::codecs::png::PngDecoder::new(Cursor::new(data.bytes())).log_err()?;
let (width, height) = image.dimensions();
let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
let mut base64_image = Vec::new();
{
let mut base64_encoder = EncoderWriter::new(
Cursor::new(&mut base64_image),
&base64::engine::general_purpose::STANDARD,
);
if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
|| image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
{
let new_bounds = ObjectFit::ScaleDown.get_bounds(
gpui::Bounds {
origin: point(px(0.0), px(0.0)),
size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
},
image_size,
);
let image = DynamicImage::from_decoder(image).log_err()?.resize(
new_bounds.size.width.0 as u32,
new_bounds.size.height.0 as u32,
image::imageops::FilterType::Triangle,
);
let mut png = Vec::new();
image
.write_with_encoder(PngEncoder::new(&mut png))
.log_err()?;
base64_encoder.write_all(png.as_slice()).log_err()?;
} else {
base64_encoder.write_all(data.bytes()).log_err()?;
}
}
// SAFETY: The base64 encoder should not produce non-UTF8.
let source = unsafe { String::from_utf8_unchecked(base64_image) };
Some(LanguageModelImage {
size: image_size,
source: source.into(),
})
})
}
/// Resolves image into an LLM-ready format (base64).
pub fn from_render_image(data: &RenderImage) -> Option<Self> {
let image_size = data.size(0);
let mut bytes = data.as_bytes(0).unwrap_or(&[]).to_vec();
// Convert from BGRA to RGBA.
for pixel in bytes.chunks_exact_mut(4) {
pixel.swap(2, 0);
}
let mut image = image::RgbaImage::from_vec(
image_size.width.0 as u32,
image_size.height.0 as u32,
bytes,
)
.expect("We already know this works");
// https://docs.anthropic.com/en/docs/build-with-claude/vision
if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
|| image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
{
let new_bounds = ObjectFit::ScaleDown.get_bounds(
gpui::Bounds {
origin: point(px(0.0), px(0.0)),
size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
},
image_size,
);
image = resize(
&image,
new_bounds.size.width.0 as u32,
new_bounds.size.height.0 as u32,
image::imageops::FilterType::Triangle,
);
}
let mut png = Vec::new();
image
.write_with_encoder(PngEncoder::new(&mut png))
.log_err()?;
let mut base64_image = Vec::new();
{
let mut base64_encoder = EncoderWriter::new(
Cursor::new(&mut base64_image),
&base64::engine::general_purpose::STANDARD,
);
base64_encoder.write_all(png.as_slice()).log_err()?;
}
// SAFETY: The base64 encoder should not produce non-UTF8.
let source = unsafe { String::from_utf8_unchecked(base64_image) };
Some(LanguageModelImage {
size: image_size,
source: source.into(),
})
}
pub fn estimate_tokens(&self) -> usize {
let width = self.size.width.0.unsigned_abs() as usize;
let height = self.size.height.0.unsigned_abs() as usize;
// From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
// Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
// so this method is more of a rough guess.
(width * height) / 750
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
pub struct LanguageModelToolResult {
pub tool_use_id: LanguageModelToolUseId,
pub tool_name: Arc<str>,
pub is_error: bool,
pub content: Arc<str>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
pub enum MessageContent {
Text(String),
Image(LanguageModelImage),
ToolUse(LanguageModelToolUse),
ToolResult(LanguageModelToolResult),
}
impl From<String> for MessageContent {
fn from(value: String) -> Self {
MessageContent::Text(value)
}
}
impl From<&str> for MessageContent {
fn from(value: &str) -> Self {
MessageContent::Text(value.to_string())
}
}
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
pub struct LanguageModelRequestMessage {
pub role: Role,
pub content: Vec<MessageContent>,
pub cache: bool,
}
impl LanguageModelRequestMessage {
pub fn string_contents(&self) -> String {
let mut buffer = String::new();
for string in self.content.iter().filter_map(|content| match content {
MessageContent::Text(text) => Some(text.as_str()),
MessageContent::ToolResult(tool_result) => Some(tool_result.content.as_ref()),
MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
}) {
buffer.push_str(string);
}
buffer
}
pub fn contents_empty(&self) -> bool {
self.content.is_empty()
|| self
.content
.first()
.map(|content| match content {
MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
MessageContent::ToolResult(tool_result) => {
tool_result.content.chars().all(|c| c.is_whitespace())
}
MessageContent::ToolUse(_) | MessageContent::Image(_) => true,
})
.unwrap_or(false)
}
}
#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
pub struct LanguageModelRequestTool {
pub name: String,
pub description: String,
pub input_schema: serde_json::Value,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct LanguageModelRequest {
pub thread_id: Option<String>,
pub prompt_id: Option<String>,
pub messages: Vec<LanguageModelRequestMessage>,
pub tools: Vec<LanguageModelRequestTool>,
pub stop: Vec<String>,
pub temperature: Option<f32>,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
pub struct LanguageModelResponseMessage {
pub role: Option<Role>,
pub content: Option<String>,
}