collab: Adjust rate-limiting measures for Claude 3.7 Sonnet (#28111)

This PR updates the usage measures used for rate limiting when using
Claude 3.7 Sonnet.

Instead of using the combined `tokens_per_minute` measure we now rate
limit individually on `input_tokens_per_minute` (which exclude cache
reads) and `output_tokens_per_minute`.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2025-04-04 13:37:24 -04:00 committed by GitHub
parent 02a8ece074
commit 558d61b907
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -514,23 +514,47 @@ async fn check_usage_limit(
.get_usage(user_id, provider, model_name, Utc::now()) .get_usage(user_id, provider, model_name, Utc::now())
.await?; .await?;
let checks = [ let checks = match (provider, model_name) {
( (LanguageModelProvider::Anthropic, "claude-3-7-sonnet") => vec![
usage.requests_this_minute, (
per_user_max_requests_per_minute, usage.requests_this_minute,
UsageMeasure::RequestsPerMinute, per_user_max_requests_per_minute,
), UsageMeasure::RequestsPerMinute,
( ),
usage.tokens_this_minute, (
per_user_max_tokens_per_minute, usage.input_tokens_this_minute,
UsageMeasure::TokensPerMinute, per_user_max_tokens_per_minute,
), UsageMeasure::InputTokensPerMinute,
( ),
usage.tokens_this_day, (
per_user_max_tokens_per_day, usage.output_tokens_this_minute,
UsageMeasure::TokensPerDay, per_user_max_tokens_per_minute,
), UsageMeasure::OutputTokensPerMinute,
]; ),
(
usage.tokens_this_day,
per_user_max_tokens_per_day,
UsageMeasure::TokensPerDay,
),
],
_ => vec![
(
usage.requests_this_minute,
per_user_max_requests_per_minute,
UsageMeasure::RequestsPerMinute,
),
(
usage.tokens_this_minute,
per_user_max_tokens_per_minute,
UsageMeasure::TokensPerMinute,
),
(
usage.tokens_this_day,
per_user_max_tokens_per_day,
UsageMeasure::TokensPerDay,
),
],
};
for (used, limit, usage_measure) in checks { for (used, limit, usage_measure) in checks {
if used > limit { if used > limit {