progress on smarter truncation strategy for file context

This commit is contained in:
KCaverly 2023-10-18 17:56:59 -04:00
parent 587fd707ba
commit 178a84bcf6
4 changed files with 124 additions and 37 deletions

View file

@ -6,6 +6,7 @@ pub trait LanguageModel {
fn name(&self) -> String;
fn count_tokens(&self, content: &str) -> anyhow::Result<usize>;
fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String>;
fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String>;
fn capacity(&self) -> anyhow::Result<usize>;
}
@ -47,6 +48,18 @@ impl LanguageModel for OpenAILanguageModel {
Err(anyhow!("bpe for open ai model was not retrieved"))
}
}
fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String> {
if let Some(bpe) = &self.bpe {
let tokens = bpe.encode_with_special_tokens(content);
if tokens.len() > length {
bpe.decode(tokens[length..].to_vec())
} else {
bpe.decode(tokens)
}
} else {
Err(anyhow!("bpe for open ai model was not retrieved"))
}
}
fn capacity(&self) -> anyhow::Result<usize> {
anyhow::Ok(tiktoken_rs::model::get_context_size(&self.name))
}