Apply rate limits in LLM service (#15997)

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>
Co-authored-by: Marshall Bowers <elliott.codes@gmail.com>
This commit is contained in:
Max Brunsfeld 2024-08-08 15:46:33 -07:00 committed by GitHub
parent 2bc503771b
commit 06625bfe94
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 983 additions and 227 deletions

View file

@ -1,24 +1,120 @@
use std::sync::Arc;
use crate::{
llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase},
test_llm_db,
};
use chrono::{Duration, Utc};
use pretty_assertions::assert_eq;
use rpc::LanguageModelProvider;
use crate::llm::db::LlmDatabase;
use crate::test_both_llm_dbs;
test_llm_db!(test_tracking_usage, test_tracking_usage_postgres);
test_both_llm_dbs!(
test_find_or_create_usage,
test_find_or_create_usage_postgres,
test_find_or_create_usage_sqlite
);
async fn test_tracking_usage(db: &mut LlmDatabase) {
let provider = LanguageModelProvider::Anthropic;
let model = "claude-3-5-sonnet";
async fn test_find_or_create_usage(db: &Arc<LlmDatabase>) {
db.initialize_providers().await.unwrap();
db.initialize().await.unwrap();
db.insert_models(&[(
provider,
model.to_string(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 10_000,
max_tokens_per_day: 50_000,
},
)])
.await
.unwrap();
let usage = db
.find_or_create_usage(123, LanguageModelProvider::Anthropic, "claude-3-5-sonnet")
let t0 = Utc::now();
let user_id = 123;
let now = t0;
db.record_usage(user_id, provider, model, 1000, now)
.await
.unwrap();
assert_eq!(usage.user_id, 123);
let now = t0 + Duration::seconds(10);
db.record_usage(user_id, provider, model, 2000, now)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 2,
tokens_this_minute: 3000,
tokens_this_day: 3000,
tokens_this_month: 3000,
}
);
let now = t0 + Duration::seconds(60);
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 1,
tokens_this_minute: 2000,
tokens_this_day: 3000,
tokens_this_month: 3000,
}
);
let now = t0 + Duration::seconds(60);
db.record_usage(user_id, provider, model, 3000, now)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 2,
tokens_this_minute: 5000,
tokens_this_day: 6000,
tokens_this_month: 6000,
}
);
let t1 = t0 + Duration::hours(24);
let now = t1;
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 5000,
tokens_this_month: 6000,
}
);
db.record_usage(user_id, provider, model, 4000, now)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 1,
tokens_this_minute: 4000,
tokens_this_day: 9000,
tokens_this_month: 10000,
}
);
let t2 = t0 + Duration::days(30);
let now = t2;
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 0,
tokens_this_month: 9000,
}
);
}