Implement simpler logic for edit predictions prompt byte limits (#23983)
Realized that the logic in #23814 was more than needed, and harder to maintain. Something like that could make sense if using the tokenizer and wanting to precisely hit a token limit. However in the case of edit predictions it's more of a latency+expense vs capability tradeoff, and so such precision is unnecessary. Happily this change didn't require much extra work, just copy-modifying parts of that change was sufficient. Release Notes: - N/A
This commit is contained in:
parent
9d6c0e57a0
commit
87b0f62041
2 changed files with 156 additions and 240 deletions
|
@ -42,6 +42,11 @@ use util::ResultExt;
|
|||
|
||||
pub use token::*;
|
||||
|
||||
const ACTIVE_USER_COUNT_CACHE_DURATION: Duration = Duration::seconds(30);
|
||||
|
||||
/// Output token limit. A copy of this constant is also in `crates/zeta/src/zeta.rs`.
|
||||
const MAX_OUTPUT_TOKENS: u32 = 2048;
|
||||
|
||||
pub struct LlmState {
|
||||
pub config: Config,
|
||||
pub executor: Executor,
|
||||
|
@ -52,8 +57,6 @@ pub struct LlmState {
|
|||
RwLock<HashMap<(LanguageModelProvider, String), (DateTime<Utc>, ActiveUserCount)>>,
|
||||
}
|
||||
|
||||
const ACTIVE_USER_COUNT_CACHE_DURATION: Duration = Duration::seconds(30);
|
||||
|
||||
impl LlmState {
|
||||
pub async fn new(config: Config, executor: Executor) -> Result<Arc<Self>> {
|
||||
let database_url = config
|
||||
|
@ -488,7 +491,7 @@ async fn predict_edits(
|
|||
fireworks::CompletionRequest {
|
||||
model: model.to_string(),
|
||||
prompt: prompt.clone(),
|
||||
max_tokens: 2048,
|
||||
max_tokens: MAX_OUTPUT_TOKENS,
|
||||
temperature: 0.,
|
||||
prediction: Some(fireworks::Prediction::Content {
|
||||
content: params.input_excerpt.clone(),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue