Add tracing needed for LLM rate limit dashboards (#16388)

Release Notes: - N/A --------- Co-authored-by: Marshall <marshall@zed.dev>
2024-08-16 14:52:31 -07:00 · 2024-08-16 14:52:31 -07:00 · 1b1070e0f7
commit 1b1070e0f7
parent 9ef3306f55
6 changed files with 227 additions and 29 deletions
--- a/crates/collab/src/llm.rs
+++ b/crates/collab/src/llm.rs
@ -217,7 +217,7 @@ async fn perform_completion(
                _ => request.model,
            };

-            let chunks = anthropic::stream_completion(
+            let (chunks, rate_limit_info) = anthropic::stream_completion_with_rate_limit_info(
                &state.http_client,
                anthropic::ANTHROPIC_API_URL,
                api_key,
@ -245,6 +245,18 @@ async fn perform_completion(
                anthropic::AnthropicError::Other(err) => Error::Internal(err),
            })?;

+            if let Some(rate_limit_info) = rate_limit_info {
+                tracing::info!(
+                    target: "upstream rate limit",
+                    provider = params.provider.to_string(),
+                    model = model,
+                    tokens_remaining = rate_limit_info.tokens_remaining,
+                    requests_remaining = rate_limit_info.requests_remaining,
+                    requests_reset = ?rate_limit_info.requests_reset,
+                    tokens_reset = ?rate_limit_info.tokens_reset,
+                );
+            }
+
            chunks
                .map(move |event| {
                    let chunk = event?;
@ -540,33 +552,74 @@ impl<S> Drop for TokenCountingStream<S> {
                .await
                .log_err();

-            if let Some((clickhouse_client, usage)) = state.clickhouse_client.as_ref().zip(usage) {
-                report_llm_usage(
-                    clickhouse_client,
-                    LlmUsageEventRow {
-                        time: Utc::now().timestamp_millis(),
-                        user_id: claims.user_id as i32,
-                        is_staff: claims.is_staff,
-                        plan: match claims.plan {
-                            Plan::Free => "free".to_string(),
-                            Plan::ZedPro => "zed_pro".to_string(),
+            if let Some(usage) = usage {
+                tracing::info!(
+                    target: "user usage",
+                    user_id = claims.user_id,
+                    login = claims.github_user_login,
+                    authn.jti = claims.jti,
+                    requests_this_minute = usage.requests_this_minute,
+                    tokens_this_minute = usage.tokens_this_minute,
+                );
+
+                if let Some(clickhouse_client) = state.clickhouse_client.as_ref() {
+                    report_llm_usage(
+                        clickhouse_client,
+                        LlmUsageEventRow {
+                            time: Utc::now().timestamp_millis(),
+                            user_id: claims.user_id as i32,
+                            is_staff: claims.is_staff,
+                            plan: match claims.plan {
+                                Plan::Free => "free".to_string(),
+                                Plan::ZedPro => "zed_pro".to_string(),
+                            },
+                            model,
+                            provider: provider.to_string(),
+                            input_token_count: input_token_count as u64,
+                            output_token_count: output_token_count as u64,
+                            requests_this_minute: usage.requests_this_minute as u64,
+                            tokens_this_minute: usage.tokens_this_minute as u64,
+                            tokens_this_day: usage.tokens_this_day as u64,
+                            input_tokens_this_month: usage.input_tokens_this_month as u64,
+                            output_tokens_this_month: usage.output_tokens_this_month as u64,
+                            spending_this_month: usage.spending_this_month as u64,
+                            lifetime_spending: usage.lifetime_spending as u64,
                        },
-                        model,
-                        provider: provider.to_string(),
-                        input_token_count: input_token_count as u64,
-                        output_token_count: output_token_count as u64,
-                        requests_this_minute: usage.requests_this_minute as u64,
-                        tokens_this_minute: usage.tokens_this_minute as u64,
-                        tokens_this_day: usage.tokens_this_day as u64,
-                        input_tokens_this_month: usage.input_tokens_this_month as u64,
-                        output_tokens_this_month: usage.output_tokens_this_month as u64,
-                        spending_this_month: usage.spending_this_month as u64,
-                        lifetime_spending: usage.lifetime_spending as u64,
-                    },
-                )
-                .await
-                .log_err();
+                    )
+                    .await
+                    .log_err();
+                }
            }
        })
    }
 }
+
+pub fn log_usage_periodically(state: Arc<LlmState>) {
+    state.executor.clone().spawn_detached(async move {
+        loop {
+            state
+                .executor
+                .sleep(std::time::Duration::from_secs(30))
+                .await;
+
+            let Some(usages) = state
+                .db
+                .get_application_wide_usages_by_model(Utc::now())
+                .await
+                .log_err()
+            else {
+                continue;
+            };
+
+            for usage in usages {
+                tracing::info!(
+                    target: "computed usage",
+                    provider = usage.provider.to_string(),
+                    model = usage.model,
+                    requests_this_minute = usage.requests_this_minute,
+                    tokens_this_minute = usage.tokens_this_minute,
+                );
+            }
+        }
+    })
+}