collab: Capture upstream input/output rate limits from Anthropic (#28106)
This PR makes it so we capture the upstream rate limit information from Anthropic for input and output tokens. Release Notes: - N/A
This commit is contained in:
parent
183f57f318
commit
1a899fda60
2 changed files with 51 additions and 31 deletions
|
@ -321,38 +321,54 @@ pub async fn stream_completion(
|
||||||
.map(|output| output.0)
|
.map(|output| output.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An individual rate limit.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct RateLimit {
|
||||||
|
pub limit: usize,
|
||||||
|
pub remaining: usize,
|
||||||
|
pub reset: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RateLimit {
|
||||||
|
fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
|
||||||
|
let limit =
|
||||||
|
get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
|
||||||
|
let remaining = get_header(
|
||||||
|
&format!("anthropic-ratelimit-{resource}-remaining"),
|
||||||
|
headers,
|
||||||
|
)?
|
||||||
|
.parse()?;
|
||||||
|
let reset = DateTime::parse_from_rfc3339(get_header(
|
||||||
|
&format!("anthropic-ratelimit-{resource}-reset"),
|
||||||
|
headers,
|
||||||
|
)?)?
|
||||||
|
.to_utc();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
limit,
|
||||||
|
remaining,
|
||||||
|
reset,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
|
/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RateLimitInfo {
|
pub struct RateLimitInfo {
|
||||||
pub requests_limit: usize,
|
pub requests: Option<RateLimit>,
|
||||||
pub requests_remaining: usize,
|
pub tokens: Option<RateLimit>,
|
||||||
pub requests_reset: DateTime<Utc>,
|
pub input_tokens: Option<RateLimit>,
|
||||||
pub tokens_limit: usize,
|
pub output_tokens: Option<RateLimit>,
|
||||||
pub tokens_remaining: usize,
|
|
||||||
pub tokens_reset: DateTime<Utc>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RateLimitInfo {
|
impl RateLimitInfo {
|
||||||
fn from_headers(headers: &HeaderMap<HeaderValue>) -> Result<Self> {
|
fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
|
||||||
let tokens_limit = get_header("anthropic-ratelimit-tokens-limit", headers)?.parse()?;
|
Self {
|
||||||
let requests_limit = get_header("anthropic-ratelimit-requests-limit", headers)?.parse()?;
|
requests: RateLimit::from_headers("requests", headers).log_err(),
|
||||||
let tokens_remaining =
|
tokens: RateLimit::from_headers("tokens", headers).log_err(),
|
||||||
get_header("anthropic-ratelimit-tokens-remaining", headers)?.parse()?;
|
input_tokens: RateLimit::from_headers("input-tokens", headers).log_err(),
|
||||||
let requests_remaining =
|
output_tokens: RateLimit::from_headers("output-tokens", headers).log_err(),
|
||||||
get_header("anthropic-ratelimit-requests-remaining", headers)?.parse()?;
|
}
|
||||||
let requests_reset = get_header("anthropic-ratelimit-requests-reset", headers)?;
|
|
||||||
let tokens_reset = get_header("anthropic-ratelimit-tokens-reset", headers)?;
|
|
||||||
let requests_reset = DateTime::parse_from_rfc3339(requests_reset)?.to_utc();
|
|
||||||
let tokens_reset = DateTime::parse_from_rfc3339(tokens_reset)?.to_utc();
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
requests_limit,
|
|
||||||
tokens_limit,
|
|
||||||
requests_remaining,
|
|
||||||
tokens_remaining,
|
|
||||||
requests_reset,
|
|
||||||
tokens_reset,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,7 +434,7 @@ pub async fn stream_completion_with_rate_limit_info(
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.boxed();
|
.boxed();
|
||||||
Ok((stream, rate_limits.log_err()))
|
Ok((stream, Some(rate_limits)))
|
||||||
} else {
|
} else {
|
||||||
let mut body = Vec::new();
|
let mut body = Vec::new();
|
||||||
response
|
response
|
||||||
|
|
|
@ -316,10 +316,14 @@ async fn perform_completion(
|
||||||
is_staff = claims.is_staff,
|
is_staff = claims.is_staff,
|
||||||
provider = params.provider.to_string(),
|
provider = params.provider.to_string(),
|
||||||
model = model,
|
model = model,
|
||||||
tokens_remaining = rate_limit_info.tokens_remaining,
|
tokens_remaining = rate_limit_info.tokens.as_ref().map(|limits| limits.remaining),
|
||||||
requests_remaining = rate_limit_info.requests_remaining,
|
input_tokens_remaining = rate_limit_info.input_tokens.as_ref().map(|limits| limits.remaining),
|
||||||
requests_reset = ?rate_limit_info.requests_reset,
|
output_tokens_remaining = rate_limit_info.output_tokens.as_ref().map(|limits| limits.remaining),
|
||||||
tokens_reset = ?rate_limit_info.tokens_reset,
|
requests_remaining = rate_limit_info.requests.as_ref().map(|limits| limits.remaining),
|
||||||
|
requests_reset = ?rate_limit_info.requests.as_ref().map(|limits| limits.reset),
|
||||||
|
tokens_reset = ?rate_limit_info.tokens.as_ref().map(|limits| limits.reset),
|
||||||
|
input_tokens_reset = ?rate_limit_info.input_tokens.as_ref().map(|limits| limits.reset),
|
||||||
|
output_tokens_reset = ?rate_limit_info.output_tokens.as_ref().map(|limits| limits.reset),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue