Increase rate limits for computing embeddings (#11326)

- Also, remove the rate limit for getting cached embeddings entirely.

Release Notes:

- N/A
This commit is contained in:
Nathan Sobo 2024-05-02 16:36:45 -07:00 committed by GitHub
parent 47b38a0428
commit b58bf64f0a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -4501,7 +4501,7 @@ impl RateLimit for ComputeEmbeddingsRateLimit {
std::env::var("EMBED_TEXTS_RATE_LIMIT_PER_HOUR") std::env::var("EMBED_TEXTS_RATE_LIMIT_PER_HOUR")
.ok() .ok()
.and_then(|v| v.parse().ok()) .and_then(|v| v.parse().ok())
.unwrap_or(120) // Picked arbitrarily .unwrap_or(5000) // Picked arbitrarily
} }
fn refill_duration() -> chrono::Duration { fn refill_duration() -> chrono::Duration {
@ -4573,25 +4573,6 @@ async fn compute_embeddings(
Ok(()) Ok(())
} }
struct GetCachedEmbeddingsRateLimit;
impl RateLimit for GetCachedEmbeddingsRateLimit {
fn capacity() -> usize {
std::env::var("EMBED_TEXTS_RATE_LIMIT_PER_HOUR")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(120) // Picked arbitrarily
}
fn refill_duration() -> chrono::Duration {
chrono::Duration::hours(1)
}
fn db_name() -> &'static str {
"get-cached-embeddings"
}
}
async fn get_cached_embeddings( async fn get_cached_embeddings(
request: proto::GetCachedEmbeddings, request: proto::GetCachedEmbeddings,
response: Response<proto::GetCachedEmbeddings>, response: Response<proto::GetCachedEmbeddings>,
@ -4599,11 +4580,6 @@ async fn get_cached_embeddings(
) -> Result<()> { ) -> Result<()> {
authorize_access_to_language_models(&session).await?; authorize_access_to_language_models(&session).await?;
session
.rate_limiter
.check::<GetCachedEmbeddingsRateLimit>(session.user_id())
.await?;
let db = session.db().await; let db = session.db().await;
let embeddings = db.get_embeddings(&request.model, &request.digests).await?; let embeddings = db.get_embeddings(&request.model, &request.digests).await?;