collab: Adjust rate-limiting measures for Claude 3.7 Sonnet (#28111)
This PR updates the usage measures used for rate limiting when using Claude 3.7 Sonnet. Instead of using the combined `tokens_per_minute` measure we now rate limit individually on `input_tokens_per_minute` (which exclude cache reads) and `output_tokens_per_minute`. Release Notes: - N/A
This commit is contained in:
parent
02a8ece074
commit
558d61b907
1 changed files with 41 additions and 17 deletions
|
@ -514,23 +514,47 @@ async fn check_usage_limit(
|
||||||
.get_usage(user_id, provider, model_name, Utc::now())
|
.get_usage(user_id, provider, model_name, Utc::now())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let checks = [
|
let checks = match (provider, model_name) {
|
||||||
(
|
(LanguageModelProvider::Anthropic, "claude-3-7-sonnet") => vec![
|
||||||
usage.requests_this_minute,
|
(
|
||||||
per_user_max_requests_per_minute,
|
usage.requests_this_minute,
|
||||||
UsageMeasure::RequestsPerMinute,
|
per_user_max_requests_per_minute,
|
||||||
),
|
UsageMeasure::RequestsPerMinute,
|
||||||
(
|
),
|
||||||
usage.tokens_this_minute,
|
(
|
||||||
per_user_max_tokens_per_minute,
|
usage.input_tokens_this_minute,
|
||||||
UsageMeasure::TokensPerMinute,
|
per_user_max_tokens_per_minute,
|
||||||
),
|
UsageMeasure::InputTokensPerMinute,
|
||||||
(
|
),
|
||||||
usage.tokens_this_day,
|
(
|
||||||
per_user_max_tokens_per_day,
|
usage.output_tokens_this_minute,
|
||||||
UsageMeasure::TokensPerDay,
|
per_user_max_tokens_per_minute,
|
||||||
),
|
UsageMeasure::OutputTokensPerMinute,
|
||||||
];
|
),
|
||||||
|
(
|
||||||
|
usage.tokens_this_day,
|
||||||
|
per_user_max_tokens_per_day,
|
||||||
|
UsageMeasure::TokensPerDay,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
_ => vec![
|
||||||
|
(
|
||||||
|
usage.requests_this_minute,
|
||||||
|
per_user_max_requests_per_minute,
|
||||||
|
UsageMeasure::RequestsPerMinute,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
usage.tokens_this_minute,
|
||||||
|
per_user_max_tokens_per_minute,
|
||||||
|
UsageMeasure::TokensPerMinute,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
usage.tokens_this_day,
|
||||||
|
per_user_max_tokens_per_day,
|
||||||
|
UsageMeasure::TokensPerDay,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
for (used, limit, usage_measure) in checks {
|
for (used, limit, usage_measure) in checks {
|
||||||
if used > limit {
|
if used > limit {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue