Add support for queuing status updates in cloud language model provider (#29818)
This sets us up to display queue position information to the user, once our language model backend is updated to support request queuing. The JSON returned by the LLM backend will need to look like this: ```json {"queue": {"status": "queued", "position": 1}} {"queue": {"status": "started"}} {"event": {"THE_UPSTREAM_MODEL_PROVIDER_EVENT": "..."}} ``` Release Notes: - N/A --------- Co-authored-by: Marshall Bowers <git@maxdeviant.com>
This commit is contained in:
parent
4d1df7bcd7
commit
04772bf17d
9 changed files with 492 additions and 430 deletions
|
@ -64,9 +64,17 @@ pub struct LanguageModelCacheConfiguration {
|
|||
pub min_total_token: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "snake_case")]
|
||||
pub enum QueueState {
|
||||
Queued { position: usize },
|
||||
Started,
|
||||
}
|
||||
|
||||
/// A completion event from a language model.
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
|
||||
pub enum LanguageModelCompletionEvent {
|
||||
QueueUpdate(QueueState),
|
||||
Stop(StopReason),
|
||||
Text(String),
|
||||
Thinking {
|
||||
|
@ -349,6 +357,7 @@ pub trait LanguageModel: Send + Sync {
|
|||
let last_token_usage = last_token_usage.clone();
|
||||
async move {
|
||||
match result {
|
||||
Ok(LanguageModelCompletionEvent::QueueUpdate { .. }) => None,
|
||||
Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
|
||||
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
|
||||
Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue