Add tracing needed for LLM rate limit dashboards (#16388)

Release Notes: - N/A --------- Co-authored-by: Marshall <marshall@zed.dev>
2024-08-16 14:52:31 -07:00 · 2024-08-16 14:52:31 -07:00 · 1b1070e0f7
commit 1b1070e0f7
parent 9ef3306f55
6 changed files with 227 additions and 29 deletions
--- a/crates/anthropic/Cargo.toml
+++ b/crates/anthropic/Cargo.toml
@ -17,6 +17,7 @@ path = "src/anthropic.rs"

 [dependencies]
 anyhow.workspace = true
+chrono.workspace = true
 futures.workspace = true
 http_client.workspace = true
 isahc.workspace = true
@ -25,6 +26,7 @@ serde.workspace = true
 serde_json.workspace = true
 strum.workspace = true
 thiserror.workspace = true
+util.workspace = true

 [dev-dependencies]
 tokio.workspace = true
--- a/crates/anthropic/src/anthropic.rs
+++ b/crates/anthropic/src/anthropic.rs
@ -1,14 +1,17 @@
 mod supported_countries;

 use anyhow::{anyhow, Context, Result};
+use chrono::{DateTime, Utc};
 use futures::{io::BufReader, stream::BoxStream, AsyncBufReadExt, AsyncReadExt, Stream, StreamExt};
 use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
 use isahc::config::Configurable;
+use isahc::http::{HeaderMap, HeaderValue};
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use std::{pin::Pin, str::FromStr};
 use strum::{EnumIter, EnumString};
 use thiserror::Error;
+use util::ResultExt as _;

 pub use supported_countries::*;

@ -195,6 +198,66 @@ pub async fn stream_completion(
    request: Request,
    low_speed_timeout: Option<Duration>,
 ) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
+    stream_completion_with_rate_limit_info(client, api_url, api_key, request, low_speed_timeout)
+        .await
+        .map(|output| output.0)
+}
+
+/// https://docs.anthropic.com/en/api/rate-limits#response-headers
+#[derive(Debug)]
+pub struct RateLimitInfo {
+    pub requests_limit: usize,
+    pub requests_remaining: usize,
+    pub requests_reset: DateTime<Utc>,
+    pub tokens_limit: usize,
+    pub tokens_remaining: usize,
+    pub tokens_reset: DateTime<Utc>,
+}
+
+impl RateLimitInfo {
+    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Result<Self> {
+        let tokens_limit = get_header("anthropic-ratelimit-tokens-limit", headers)?.parse()?;
+        let requests_limit = get_header("anthropic-ratelimit-requests-limit", headers)?.parse()?;
+        let tokens_remaining =
+            get_header("anthropic-ratelimit-tokens-remaining", headers)?.parse()?;
+        let requests_remaining =
+            get_header("anthropic-ratelimit-requests-remaining", headers)?.parse()?;
+        let requests_reset = get_header("anthropic-ratelimit-requests-reset", headers)?;
+        let tokens_reset = get_header("anthropic-ratelimit-tokens-reset", headers)?;
+        let requests_reset = DateTime::parse_from_rfc3339(requests_reset)?.to_utc();
+        let tokens_reset = DateTime::parse_from_rfc3339(tokens_reset)?.to_utc();
+
+        Ok(Self {
+            requests_limit,
+            tokens_limit,
+            requests_remaining,
+            tokens_remaining,
+            requests_reset,
+            tokens_reset,
+        })
+    }
+}
+
+fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> Result<&'a str, anyhow::Error> {
+    Ok(headers
+        .get(key)
+        .ok_or_else(|| anyhow!("missing header `{key}`"))?
+        .to_str()?)
+}
+
+pub async fn stream_completion_with_rate_limit_info(
+    client: &dyn HttpClient,
+    api_url: &str,
+    api_key: &str,
+    request: Request,
+    low_speed_timeout: Option<Duration>,
+) -> Result<
+    (
+        BoxStream<'static, Result<Event, AnthropicError>>,
+        Option<RateLimitInfo>,
+    ),
+    AnthropicError,
+> {
    let request = StreamingRequest {
        base: request,
        stream: true,
@ -224,8 +287,9 @@ pub async fn stream_completion(
        .await
        .context("failed to send request to Anthropic")?;
    if response.status().is_success() {
+        let rate_limits = RateLimitInfo::from_headers(response.headers());
        let reader = BufReader::new(response.into_body());
-        Ok(reader
+        let stream = reader
            .lines()
            .filter_map(|line| async move {
                match line {
@ -239,7 +303,8 @@ pub async fn stream_completion(
                    Err(error) => Some(Err(AnthropicError::Other(anyhow!(error)))),
                }
            })
-            .boxed())
+            .boxed();
+        Ok((stream, rate_limits.log_err()))
    } else {
        let mut body = Vec::new();
        response