collab: Adjust rate-limiting measures for Claude 3.7 Sonnet (#28111)
This PR updates the usage measures used for rate limiting when using Claude 3.7 Sonnet. Instead of using the combined `tokens_per_minute` measure we now rate limit individually on `input_tokens_per_minute` (which exclude cache reads) and `output_tokens_per_minute`. Release Notes: - N/A
This commit is contained in:
parent
02a8ece074
commit
558d61b907
1 changed files with 41 additions and 17 deletions
|
@ -514,7 +514,30 @@ async fn check_usage_limit(
|
|||
.get_usage(user_id, provider, model_name, Utc::now())
|
||||
.await?;
|
||||
|
||||
let checks = [
|
||||
let checks = match (provider, model_name) {
|
||||
(LanguageModelProvider::Anthropic, "claude-3-7-sonnet") => vec![
|
||||
(
|
||||
usage.requests_this_minute,
|
||||
per_user_max_requests_per_minute,
|
||||
UsageMeasure::RequestsPerMinute,
|
||||
),
|
||||
(
|
||||
usage.input_tokens_this_minute,
|
||||
per_user_max_tokens_per_minute,
|
||||
UsageMeasure::InputTokensPerMinute,
|
||||
),
|
||||
(
|
||||
usage.output_tokens_this_minute,
|
||||
per_user_max_tokens_per_minute,
|
||||
UsageMeasure::OutputTokensPerMinute,
|
||||
),
|
||||
(
|
||||
usage.tokens_this_day,
|
||||
per_user_max_tokens_per_day,
|
||||
UsageMeasure::TokensPerDay,
|
||||
),
|
||||
],
|
||||
_ => vec![
|
||||
(
|
||||
usage.requests_this_minute,
|
||||
per_user_max_requests_per_minute,
|
||||
|
@ -530,7 +553,8 @@ async fn check_usage_limit(
|
|||
per_user_max_tokens_per_day,
|
||||
UsageMeasure::TokensPerDay,
|
||||
),
|
||||
];
|
||||
],
|
||||
};
|
||||
|
||||
for (used, limit, usage_measure) in checks {
|
||||
if used > limit {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue