collab: Track cache writes/reads in LLM usage (#18834)
This PR extends the LLM usage tracking to support tracking usage for cache writes and reads for Anthropic models. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
parent
c5d252b837
commit
d55f025906
9 changed files with 241 additions and 39 deletions
|
@ -521,6 +521,10 @@ pub struct Usage {
|
||||||
pub input_tokens: Option<u32>,
|
pub input_tokens: Option<u32>,
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub output_tokens: Option<u32>,
|
pub output_tokens: Option<u32>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub cache_creation_input_tokens: Option<u32>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub cache_read_input_tokens: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
alter table models
|
||||||
|
add column price_per_million_cache_creation_input_tokens integer not null default 0,
|
||||||
|
add column price_per_million_cache_read_input_tokens integer not null default 0;
|
||||||
|
|
||||||
|
alter table usages
|
||||||
|
add column cache_creation_input_tokens_this_month bigint not null default 0,
|
||||||
|
add column cache_read_input_tokens_this_month bigint not null default 0;
|
||||||
|
|
||||||
|
alter table lifetime_usages
|
||||||
|
add column cache_creation_input_tokens bigint not null default 0,
|
||||||
|
add column cache_read_input_tokens bigint not null default 0;
|
|
@ -318,22 +318,31 @@ async fn perform_completion(
|
||||||
chunks
|
chunks
|
||||||
.map(move |event| {
|
.map(move |event| {
|
||||||
let chunk = event?;
|
let chunk = event?;
|
||||||
let (input_tokens, output_tokens) = match &chunk {
|
let (
|
||||||
|
input_tokens,
|
||||||
|
output_tokens,
|
||||||
|
cache_creation_input_tokens,
|
||||||
|
cache_read_input_tokens,
|
||||||
|
) = match &chunk {
|
||||||
anthropic::Event::MessageStart {
|
anthropic::Event::MessageStart {
|
||||||
message: anthropic::Response { usage, .. },
|
message: anthropic::Response { usage, .. },
|
||||||
}
|
}
|
||||||
| anthropic::Event::MessageDelta { usage, .. } => (
|
| anthropic::Event::MessageDelta { usage, .. } => (
|
||||||
usage.input_tokens.unwrap_or(0) as usize,
|
usage.input_tokens.unwrap_or(0) as usize,
|
||||||
usage.output_tokens.unwrap_or(0) as usize,
|
usage.output_tokens.unwrap_or(0) as usize,
|
||||||
|
usage.cache_creation_input_tokens.unwrap_or(0) as usize,
|
||||||
|
usage.cache_read_input_tokens.unwrap_or(0) as usize,
|
||||||
),
|
),
|
||||||
_ => (0, 0),
|
_ => (0, 0, 0, 0),
|
||||||
};
|
};
|
||||||
|
|
||||||
anyhow::Ok((
|
anyhow::Ok(CompletionChunk {
|
||||||
serde_json::to_vec(&chunk).unwrap(),
|
bytes: serde_json::to_vec(&chunk).unwrap(),
|
||||||
input_tokens,
|
input_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
))
|
cache_creation_input_tokens,
|
||||||
|
cache_read_input_tokens,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
|
@ -359,11 +368,13 @@ async fn perform_completion(
|
||||||
chunk.usage.as_ref().map_or(0, |u| u.prompt_tokens) as usize;
|
chunk.usage.as_ref().map_or(0, |u| u.prompt_tokens) as usize;
|
||||||
let output_tokens =
|
let output_tokens =
|
||||||
chunk.usage.as_ref().map_or(0, |u| u.completion_tokens) as usize;
|
chunk.usage.as_ref().map_or(0, |u| u.completion_tokens) as usize;
|
||||||
(
|
CompletionChunk {
|
||||||
serde_json::to_vec(&chunk).unwrap(),
|
bytes: serde_json::to_vec(&chunk).unwrap(),
|
||||||
input_tokens,
|
input_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
)
|
cache_creation_input_tokens: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.boxed()
|
.boxed()
|
||||||
|
@ -387,13 +398,13 @@ async fn perform_completion(
|
||||||
.map(|event| {
|
.map(|event| {
|
||||||
event.map(|chunk| {
|
event.map(|chunk| {
|
||||||
// TODO - implement token counting for Google AI
|
// TODO - implement token counting for Google AI
|
||||||
let input_tokens = 0;
|
CompletionChunk {
|
||||||
let output_tokens = 0;
|
bytes: serde_json::to_vec(&chunk).unwrap(),
|
||||||
(
|
input_tokens: 0,
|
||||||
serde_json::to_vec(&chunk).unwrap(),
|
output_tokens: 0,
|
||||||
input_tokens,
|
cache_creation_input_tokens: 0,
|
||||||
output_tokens,
|
cache_read_input_tokens: 0,
|
||||||
)
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.boxed()
|
.boxed()
|
||||||
|
@ -407,6 +418,8 @@ async fn perform_completion(
|
||||||
model,
|
model,
|
||||||
input_tokens: 0,
|
input_tokens: 0,
|
||||||
output_tokens: 0,
|
output_tokens: 0,
|
||||||
|
cache_creation_input_tokens: 0,
|
||||||
|
cache_read_input_tokens: 0,
|
||||||
inner_stream: stream,
|
inner_stream: stream,
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
@ -551,6 +564,14 @@ async fn check_usage_limit(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct CompletionChunk {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
input_tokens: usize,
|
||||||
|
output_tokens: usize,
|
||||||
|
cache_creation_input_tokens: usize,
|
||||||
|
cache_read_input_tokens: usize,
|
||||||
|
}
|
||||||
|
|
||||||
struct TokenCountingStream<S> {
|
struct TokenCountingStream<S> {
|
||||||
state: Arc<LlmState>,
|
state: Arc<LlmState>,
|
||||||
claims: LlmTokenClaims,
|
claims: LlmTokenClaims,
|
||||||
|
@ -558,22 +579,26 @@ struct TokenCountingStream<S> {
|
||||||
model: String,
|
model: String,
|
||||||
input_tokens: usize,
|
input_tokens: usize,
|
||||||
output_tokens: usize,
|
output_tokens: usize,
|
||||||
|
cache_creation_input_tokens: usize,
|
||||||
|
cache_read_input_tokens: usize,
|
||||||
inner_stream: S,
|
inner_stream: S,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<S> Stream for TokenCountingStream<S>
|
impl<S> Stream for TokenCountingStream<S>
|
||||||
where
|
where
|
||||||
S: Stream<Item = Result<(Vec<u8>, usize, usize), anyhow::Error>> + Unpin,
|
S: Stream<Item = Result<CompletionChunk, anyhow::Error>> + Unpin,
|
||||||
{
|
{
|
||||||
type Item = Result<Vec<u8>, anyhow::Error>;
|
type Item = Result<Vec<u8>, anyhow::Error>;
|
||||||
|
|
||||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||||
match Pin::new(&mut self.inner_stream).poll_next(cx) {
|
match Pin::new(&mut self.inner_stream).poll_next(cx) {
|
||||||
Poll::Ready(Some(Ok((mut bytes, input_tokens, output_tokens)))) => {
|
Poll::Ready(Some(Ok(mut chunk))) => {
|
||||||
bytes.push(b'\n');
|
chunk.bytes.push(b'\n');
|
||||||
self.input_tokens += input_tokens;
|
self.input_tokens += chunk.input_tokens;
|
||||||
self.output_tokens += output_tokens;
|
self.output_tokens += chunk.output_tokens;
|
||||||
Poll::Ready(Some(Ok(bytes)))
|
self.cache_creation_input_tokens += chunk.cache_creation_input_tokens;
|
||||||
|
self.cache_read_input_tokens += chunk.cache_read_input_tokens;
|
||||||
|
Poll::Ready(Some(Ok(chunk.bytes)))
|
||||||
}
|
}
|
||||||
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
|
Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
|
||||||
Poll::Ready(None) => Poll::Ready(None),
|
Poll::Ready(None) => Poll::Ready(None),
|
||||||
|
@ -590,6 +615,8 @@ impl<S> Drop for TokenCountingStream<S> {
|
||||||
let model = std::mem::take(&mut self.model);
|
let model = std::mem::take(&mut self.model);
|
||||||
let input_token_count = self.input_tokens;
|
let input_token_count = self.input_tokens;
|
||||||
let output_token_count = self.output_tokens;
|
let output_token_count = self.output_tokens;
|
||||||
|
let cache_creation_input_token_count = self.cache_creation_input_tokens;
|
||||||
|
let cache_read_input_token_count = self.cache_read_input_tokens;
|
||||||
self.state.executor.spawn_detached(async move {
|
self.state.executor.spawn_detached(async move {
|
||||||
let usage = state
|
let usage = state
|
||||||
.db
|
.db
|
||||||
|
@ -599,6 +626,8 @@ impl<S> Drop for TokenCountingStream<S> {
|
||||||
provider,
|
provider,
|
||||||
&model,
|
&model,
|
||||||
input_token_count,
|
input_token_count,
|
||||||
|
cache_creation_input_token_count,
|
||||||
|
cache_read_input_token_count,
|
||||||
output_token_count,
|
output_token_count,
|
||||||
Utc::now(),
|
Utc::now(),
|
||||||
)
|
)
|
||||||
|
@ -630,11 +659,20 @@ impl<S> Drop for TokenCountingStream<S> {
|
||||||
model,
|
model,
|
||||||
provider: provider.to_string(),
|
provider: provider.to_string(),
|
||||||
input_token_count: input_token_count as u64,
|
input_token_count: input_token_count as u64,
|
||||||
|
cache_creation_input_token_count: cache_creation_input_token_count
|
||||||
|
as u64,
|
||||||
|
cache_read_input_token_count: cache_read_input_token_count as u64,
|
||||||
output_token_count: output_token_count as u64,
|
output_token_count: output_token_count as u64,
|
||||||
requests_this_minute: usage.requests_this_minute as u64,
|
requests_this_minute: usage.requests_this_minute as u64,
|
||||||
tokens_this_minute: usage.tokens_this_minute as u64,
|
tokens_this_minute: usage.tokens_this_minute as u64,
|
||||||
tokens_this_day: usage.tokens_this_day as u64,
|
tokens_this_day: usage.tokens_this_day as u64,
|
||||||
input_tokens_this_month: usage.input_tokens_this_month as u64,
|
input_tokens_this_month: usage.input_tokens_this_month as u64,
|
||||||
|
cache_creation_input_tokens_this_month: usage
|
||||||
|
.cache_creation_input_tokens_this_month
|
||||||
|
as u64,
|
||||||
|
cache_read_input_tokens_this_month: usage
|
||||||
|
.cache_read_input_tokens_this_month
|
||||||
|
as u64,
|
||||||
output_tokens_this_month: usage.output_tokens_this_month as u64,
|
output_tokens_this_month: usage.output_tokens_this_month as u64,
|
||||||
spending_this_month: usage.spending_this_month as u64,
|
spending_this_month: usage.spending_this_month as u64,
|
||||||
lifetime_spending: usage.lifetime_spending as u64,
|
lifetime_spending: usage.lifetime_spending as u64,
|
||||||
|
|
|
@ -14,6 +14,8 @@ pub struct Usage {
|
||||||
pub tokens_this_minute: usize,
|
pub tokens_this_minute: usize,
|
||||||
pub tokens_this_day: usize,
|
pub tokens_this_day: usize,
|
||||||
pub input_tokens_this_month: usize,
|
pub input_tokens_this_month: usize,
|
||||||
|
pub cache_creation_input_tokens_this_month: usize,
|
||||||
|
pub cache_read_input_tokens_this_month: usize,
|
||||||
pub output_tokens_this_month: usize,
|
pub output_tokens_this_month: usize,
|
||||||
pub spending_this_month: usize,
|
pub spending_this_month: usize,
|
||||||
pub lifetime_spending: usize,
|
pub lifetime_spending: usize,
|
||||||
|
@ -160,17 +162,14 @@ impl LlmDatabase {
|
||||||
.all(&*tx)
|
.all(&*tx)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let (lifetime_input_tokens, lifetime_output_tokens) = lifetime_usage::Entity::find()
|
let lifetime_usage = lifetime_usage::Entity::find()
|
||||||
.filter(
|
.filter(
|
||||||
lifetime_usage::Column::UserId
|
lifetime_usage::Column::UserId
|
||||||
.eq(user_id)
|
.eq(user_id)
|
||||||
.and(lifetime_usage::Column::ModelId.eq(model.id)),
|
.and(lifetime_usage::Column::ModelId.eq(model.id)),
|
||||||
)
|
)
|
||||||
.one(&*tx)
|
.one(&*tx)
|
||||||
.await?
|
.await?;
|
||||||
.map_or((0, 0), |usage| {
|
|
||||||
(usage.input_tokens as usize, usage.output_tokens as usize)
|
|
||||||
});
|
|
||||||
|
|
||||||
let requests_this_minute =
|
let requests_this_minute =
|
||||||
self.get_usage_for_measure(&usages, now, UsageMeasure::RequestsPerMinute)?;
|
self.get_usage_for_measure(&usages, now, UsageMeasure::RequestsPerMinute)?;
|
||||||
|
@ -180,18 +179,44 @@ impl LlmDatabase {
|
||||||
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
|
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
|
||||||
let input_tokens_this_month =
|
let input_tokens_this_month =
|
||||||
self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
|
self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
|
||||||
|
let cache_creation_input_tokens_this_month = self.get_usage_for_measure(
|
||||||
|
&usages,
|
||||||
|
now,
|
||||||
|
UsageMeasure::CacheCreationInputTokensPerMonth,
|
||||||
|
)?;
|
||||||
|
let cache_read_input_tokens_this_month = self.get_usage_for_measure(
|
||||||
|
&usages,
|
||||||
|
now,
|
||||||
|
UsageMeasure::CacheReadInputTokensPerMonth,
|
||||||
|
)?;
|
||||||
let output_tokens_this_month =
|
let output_tokens_this_month =
|
||||||
self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
|
self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
|
||||||
let spending_this_month =
|
let spending_this_month = calculate_spending(
|
||||||
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
|
model,
|
||||||
let lifetime_spending =
|
input_tokens_this_month,
|
||||||
calculate_spending(model, lifetime_input_tokens, lifetime_output_tokens);
|
cache_creation_input_tokens_this_month,
|
||||||
|
cache_read_input_tokens_this_month,
|
||||||
|
output_tokens_this_month,
|
||||||
|
);
|
||||||
|
let lifetime_spending = if let Some(lifetime_usage) = lifetime_usage {
|
||||||
|
calculate_spending(
|
||||||
|
model,
|
||||||
|
lifetime_usage.input_tokens as usize,
|
||||||
|
lifetime_usage.cache_creation_input_tokens as usize,
|
||||||
|
lifetime_usage.cache_read_input_tokens as usize,
|
||||||
|
lifetime_usage.output_tokens as usize,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Usage {
|
Ok(Usage {
|
||||||
requests_this_minute,
|
requests_this_minute,
|
||||||
tokens_this_minute,
|
tokens_this_minute,
|
||||||
tokens_this_day,
|
tokens_this_day,
|
||||||
input_tokens_this_month,
|
input_tokens_this_month,
|
||||||
|
cache_creation_input_tokens_this_month,
|
||||||
|
cache_read_input_tokens_this_month,
|
||||||
output_tokens_this_month,
|
output_tokens_this_month,
|
||||||
spending_this_month,
|
spending_this_month,
|
||||||
lifetime_spending,
|
lifetime_spending,
|
||||||
|
@ -208,6 +233,8 @@ impl LlmDatabase {
|
||||||
provider: LanguageModelProvider,
|
provider: LanguageModelProvider,
|
||||||
model_name: &str,
|
model_name: &str,
|
||||||
input_token_count: usize,
|
input_token_count: usize,
|
||||||
|
cache_creation_input_tokens: usize,
|
||||||
|
cache_read_input_tokens: usize,
|
||||||
output_token_count: usize,
|
output_token_count: usize,
|
||||||
now: DateTimeUtc,
|
now: DateTimeUtc,
|
||||||
) -> Result<Usage> {
|
) -> Result<Usage> {
|
||||||
|
@ -235,6 +262,10 @@ impl LlmDatabase {
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
let total_token_count = input_token_count
|
||||||
|
+ cache_read_input_tokens
|
||||||
|
+ cache_creation_input_tokens
|
||||||
|
+ output_token_count;
|
||||||
let tokens_this_minute = self
|
let tokens_this_minute = self
|
||||||
.update_usage_for_measure(
|
.update_usage_for_measure(
|
||||||
user_id,
|
user_id,
|
||||||
|
@ -243,7 +274,7 @@ impl LlmDatabase {
|
||||||
&usages,
|
&usages,
|
||||||
UsageMeasure::TokensPerMinute,
|
UsageMeasure::TokensPerMinute,
|
||||||
now,
|
now,
|
||||||
input_token_count + output_token_count,
|
total_token_count,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
@ -255,7 +286,7 @@ impl LlmDatabase {
|
||||||
&usages,
|
&usages,
|
||||||
UsageMeasure::TokensPerDay,
|
UsageMeasure::TokensPerDay,
|
||||||
now,
|
now,
|
||||||
input_token_count + output_token_count,
|
total_token_count,
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
@ -271,6 +302,30 @@ impl LlmDatabase {
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
let cache_creation_input_tokens_this_month = self
|
||||||
|
.update_usage_for_measure(
|
||||||
|
user_id,
|
||||||
|
is_staff,
|
||||||
|
model.id,
|
||||||
|
&usages,
|
||||||
|
UsageMeasure::CacheCreationInputTokensPerMonth,
|
||||||
|
now,
|
||||||
|
cache_creation_input_tokens,
|
||||||
|
&tx,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
let cache_read_input_tokens_this_month = self
|
||||||
|
.update_usage_for_measure(
|
||||||
|
user_id,
|
||||||
|
is_staff,
|
||||||
|
model.id,
|
||||||
|
&usages,
|
||||||
|
UsageMeasure::CacheReadInputTokensPerMonth,
|
||||||
|
now,
|
||||||
|
cache_read_input_tokens,
|
||||||
|
&tx,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
let output_tokens_this_month = self
|
let output_tokens_this_month = self
|
||||||
.update_usage_for_measure(
|
.update_usage_for_measure(
|
||||||
user_id,
|
user_id,
|
||||||
|
@ -283,8 +338,13 @@ impl LlmDatabase {
|
||||||
&tx,
|
&tx,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
let spending_this_month =
|
let spending_this_month = calculate_spending(
|
||||||
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
|
model,
|
||||||
|
input_tokens_this_month,
|
||||||
|
cache_creation_input_tokens_this_month,
|
||||||
|
cache_read_input_tokens_this_month,
|
||||||
|
output_tokens_this_month,
|
||||||
|
);
|
||||||
|
|
||||||
// Update lifetime usage
|
// Update lifetime usage
|
||||||
let lifetime_usage = lifetime_usage::Entity::find()
|
let lifetime_usage = lifetime_usage::Entity::find()
|
||||||
|
@ -303,6 +363,12 @@ impl LlmDatabase {
|
||||||
input_tokens: ActiveValue::set(
|
input_tokens: ActiveValue::set(
|
||||||
usage.input_tokens + input_token_count as i64,
|
usage.input_tokens + input_token_count as i64,
|
||||||
),
|
),
|
||||||
|
cache_creation_input_tokens: ActiveValue::set(
|
||||||
|
usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
|
||||||
|
),
|
||||||
|
cache_read_input_tokens: ActiveValue::set(
|
||||||
|
usage.cache_read_input_tokens + cache_read_input_tokens as i64,
|
||||||
|
),
|
||||||
output_tokens: ActiveValue::set(
|
output_tokens: ActiveValue::set(
|
||||||
usage.output_tokens + output_token_count as i64,
|
usage.output_tokens + output_token_count as i64,
|
||||||
),
|
),
|
||||||
|
@ -327,6 +393,8 @@ impl LlmDatabase {
|
||||||
let lifetime_spending = calculate_spending(
|
let lifetime_spending = calculate_spending(
|
||||||
model,
|
model,
|
||||||
lifetime_usage.input_tokens as usize,
|
lifetime_usage.input_tokens as usize,
|
||||||
|
lifetime_usage.cache_creation_input_tokens as usize,
|
||||||
|
lifetime_usage.cache_read_input_tokens as usize,
|
||||||
lifetime_usage.output_tokens as usize,
|
lifetime_usage.output_tokens as usize,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -335,6 +403,8 @@ impl LlmDatabase {
|
||||||
tokens_this_minute,
|
tokens_this_minute,
|
||||||
tokens_this_day,
|
tokens_this_day,
|
||||||
input_tokens_this_month,
|
input_tokens_this_month,
|
||||||
|
cache_creation_input_tokens_this_month,
|
||||||
|
cache_read_input_tokens_this_month,
|
||||||
output_tokens_this_month,
|
output_tokens_this_month,
|
||||||
spending_this_month,
|
spending_this_month,
|
||||||
lifetime_spending,
|
lifetime_spending,
|
||||||
|
@ -501,13 +571,24 @@ impl LlmDatabase {
|
||||||
fn calculate_spending(
|
fn calculate_spending(
|
||||||
model: &model::Model,
|
model: &model::Model,
|
||||||
input_tokens_this_month: usize,
|
input_tokens_this_month: usize,
|
||||||
|
cache_creation_input_tokens_this_month: usize,
|
||||||
|
cache_read_input_tokens_this_month: usize,
|
||||||
output_tokens_this_month: usize,
|
output_tokens_this_month: usize,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
let input_token_cost =
|
let input_token_cost =
|
||||||
input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
|
input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
|
||||||
|
let cache_creation_input_token_cost = cache_creation_input_tokens_this_month
|
||||||
|
* model.price_per_million_cache_creation_input_tokens as usize
|
||||||
|
/ 1_000_000;
|
||||||
|
let cache_read_input_token_cost = cache_read_input_tokens_this_month
|
||||||
|
* model.price_per_million_cache_read_input_tokens as usize
|
||||||
|
/ 1_000_000;
|
||||||
let output_token_cost =
|
let output_token_cost =
|
||||||
output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
|
output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
|
||||||
input_token_cost + output_token_cost
|
input_token_cost
|
||||||
|
+ cache_creation_input_token_cost
|
||||||
|
+ cache_read_input_token_cost
|
||||||
|
+ output_token_cost
|
||||||
}
|
}
|
||||||
|
|
||||||
const MINUTE_BUCKET_COUNT: usize = 12;
|
const MINUTE_BUCKET_COUNT: usize = 12;
|
||||||
|
@ -521,6 +602,8 @@ impl UsageMeasure {
|
||||||
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
|
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
|
||||||
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
|
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
|
||||||
UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
|
UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||||
|
UsageMeasure::CacheCreationInputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||||
|
UsageMeasure::CacheReadInputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||||
UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
|
UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -531,6 +614,8 @@ impl UsageMeasure {
|
||||||
UsageMeasure::TokensPerMinute => Duration::minutes(1),
|
UsageMeasure::TokensPerMinute => Duration::minutes(1),
|
||||||
UsageMeasure::TokensPerDay => Duration::hours(24),
|
UsageMeasure::TokensPerDay => Duration::hours(24),
|
||||||
UsageMeasure::InputTokensPerMonth => Duration::days(30),
|
UsageMeasure::InputTokensPerMonth => Duration::days(30),
|
||||||
|
UsageMeasure::CacheCreationInputTokensPerMonth => Duration::days(30),
|
||||||
|
UsageMeasure::CacheReadInputTokensPerMonth => Duration::days(30),
|
||||||
UsageMeasure::OutputTokensPerMonth => Duration::days(30),
|
UsageMeasure::OutputTokensPerMonth => Duration::days(30),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,8 @@ pub struct Model {
|
||||||
pub user_id: UserId,
|
pub user_id: UserId,
|
||||||
pub model_id: ModelId,
|
pub model_id: ModelId,
|
||||||
pub input_tokens: i64,
|
pub input_tokens: i64,
|
||||||
|
pub cache_creation_input_tokens: i64,
|
||||||
|
pub cache_read_input_tokens: i64,
|
||||||
pub output_tokens: i64,
|
pub output_tokens: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,8 @@ pub struct Model {
|
||||||
pub max_tokens_per_minute: i64,
|
pub max_tokens_per_minute: i64,
|
||||||
pub max_tokens_per_day: i64,
|
pub max_tokens_per_day: i64,
|
||||||
pub price_per_million_input_tokens: i32,
|
pub price_per_million_input_tokens: i32,
|
||||||
|
pub price_per_million_cache_creation_input_tokens: i32,
|
||||||
|
pub price_per_million_cache_read_input_tokens: i32,
|
||||||
pub price_per_million_output_tokens: i32,
|
pub price_per_million_output_tokens: i32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,8 @@ pub enum UsageMeasure {
|
||||||
TokensPerMinute,
|
TokensPerMinute,
|
||||||
TokensPerDay,
|
TokensPerDay,
|
||||||
InputTokensPerMonth,
|
InputTokensPerMonth,
|
||||||
|
CacheCreationInputTokensPerMonth,
|
||||||
|
CacheReadInputTokensPerMonth,
|
||||||
OutputTokensPerMonth,
|
OutputTokensPerMonth,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,12 +33,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
let user_id = UserId::from_proto(123);
|
let user_id = UserId::from_proto(123);
|
||||||
|
|
||||||
let now = t0;
|
let now = t0;
|
||||||
db.record_usage(user_id, false, provider, model, 1000, 0, now)
|
db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let now = t0 + Duration::seconds(10);
|
let now = t0 + Duration::seconds(10);
|
||||||
db.record_usage(user_id, false, provider, model, 2000, 0, now)
|
db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -50,6 +50,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 3000,
|
tokens_this_minute: 3000,
|
||||||
tokens_this_day: 3000,
|
tokens_this_day: 3000,
|
||||||
input_tokens_this_month: 3000,
|
input_tokens_this_month: 3000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
|
@ -65,6 +67,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 2000,
|
tokens_this_minute: 2000,
|
||||||
tokens_this_day: 3000,
|
tokens_this_day: 3000,
|
||||||
input_tokens_this_month: 3000,
|
input_tokens_this_month: 3000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
|
@ -72,7 +76,7 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
);
|
);
|
||||||
|
|
||||||
let now = t0 + Duration::seconds(60);
|
let now = t0 + Duration::seconds(60);
|
||||||
db.record_usage(user_id, false, provider, model, 3000, 0, now)
|
db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -84,6 +88,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 5000,
|
tokens_this_minute: 5000,
|
||||||
tokens_this_day: 6000,
|
tokens_this_day: 6000,
|
||||||
input_tokens_this_month: 6000,
|
input_tokens_this_month: 6000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
|
@ -100,13 +106,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 0,
|
tokens_this_minute: 0,
|
||||||
tokens_this_day: 5000,
|
tokens_this_day: 5000,
|
||||||
input_tokens_this_month: 6000,
|
input_tokens_this_month: 6000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
db.record_usage(user_id, false, provider, model, 4000, 0, now)
|
db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -118,6 +126,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 4000,
|
tokens_this_minute: 4000,
|
||||||
tokens_this_day: 9000,
|
tokens_this_day: 9000,
|
||||||
input_tokens_this_month: 10000,
|
input_tokens_this_month: 10000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
|
@ -134,6 +144,50 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
||||||
tokens_this_minute: 0,
|
tokens_this_minute: 0,
|
||||||
tokens_this_day: 0,
|
tokens_this_day: 0,
|
||||||
input_tokens_this_month: 9000,
|
input_tokens_this_month: 9000,
|
||||||
|
cache_creation_input_tokens_this_month: 0,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
|
output_tokens_this_month: 0,
|
||||||
|
spending_this_month: 0,
|
||||||
|
lifetime_spending: 0,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Test cache creation input tokens
|
||||||
|
db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
usage,
|
||||||
|
Usage {
|
||||||
|
requests_this_minute: 1,
|
||||||
|
tokens_this_minute: 1500,
|
||||||
|
tokens_this_day: 1500,
|
||||||
|
input_tokens_this_month: 10000,
|
||||||
|
cache_creation_input_tokens_this_month: 500,
|
||||||
|
cache_read_input_tokens_this_month: 0,
|
||||||
|
output_tokens_this_month: 0,
|
||||||
|
spending_this_month: 0,
|
||||||
|
lifetime_spending: 0,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Test cache read input tokens
|
||||||
|
db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
usage,
|
||||||
|
Usage {
|
||||||
|
requests_this_minute: 2,
|
||||||
|
tokens_this_minute: 2800,
|
||||||
|
tokens_this_day: 2800,
|
||||||
|
input_tokens_this_month: 11000,
|
||||||
|
cache_creation_input_tokens_this_month: 500,
|
||||||
|
cache_read_input_tokens_this_month: 300,
|
||||||
output_tokens_this_month: 0,
|
output_tokens_this_month: 0,
|
||||||
spending_this_month: 0,
|
spending_this_month: 0,
|
||||||
lifetime_spending: 0,
|
lifetime_spending: 0,
|
||||||
|
|
|
@ -12,11 +12,15 @@ pub struct LlmUsageEventRow {
|
||||||
pub model: String,
|
pub model: String,
|
||||||
pub provider: String,
|
pub provider: String,
|
||||||
pub input_token_count: u64,
|
pub input_token_count: u64,
|
||||||
|
pub cache_creation_input_token_count: u64,
|
||||||
|
pub cache_read_input_token_count: u64,
|
||||||
pub output_token_count: u64,
|
pub output_token_count: u64,
|
||||||
pub requests_this_minute: u64,
|
pub requests_this_minute: u64,
|
||||||
pub tokens_this_minute: u64,
|
pub tokens_this_minute: u64,
|
||||||
pub tokens_this_day: u64,
|
pub tokens_this_day: u64,
|
||||||
pub input_tokens_this_month: u64,
|
pub input_tokens_this_month: u64,
|
||||||
|
pub cache_creation_input_tokens_this_month: u64,
|
||||||
|
pub cache_read_input_tokens_this_month: u64,
|
||||||
pub output_tokens_this_month: u64,
|
pub output_tokens_this_month: u64,
|
||||||
pub spending_this_month: u64,
|
pub spending_this_month: u64,
|
||||||
pub lifetime_spending: u64,
|
pub lifetime_spending: u64,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue