progress on smarter truncation strategy for file context
This commit is contained in:
parent
587fd707ba
commit
178a84bcf6
4 changed files with 124 additions and 37 deletions
|
@ -6,6 +6,7 @@ pub trait LanguageModel {
|
|||
fn name(&self) -> String;
|
||||
fn count_tokens(&self, content: &str) -> anyhow::Result<usize>;
|
||||
fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String>;
|
||||
fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String>;
|
||||
fn capacity(&self) -> anyhow::Result<usize>;
|
||||
}
|
||||
|
||||
|
@ -47,6 +48,18 @@ impl LanguageModel for OpenAILanguageModel {
|
|||
Err(anyhow!("bpe for open ai model was not retrieved"))
|
||||
}
|
||||
}
|
||||
fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String> {
|
||||
if let Some(bpe) = &self.bpe {
|
||||
let tokens = bpe.encode_with_special_tokens(content);
|
||||
if tokens.len() > length {
|
||||
bpe.decode(tokens[length..].to_vec())
|
||||
} else {
|
||||
bpe.decode(tokens)
|
||||
}
|
||||
} else {
|
||||
Err(anyhow!("bpe for open ai model was not retrieved"))
|
||||
}
|
||||
}
|
||||
fn capacity(&self) -> anyhow::Result<usize> {
|
||||
anyhow::Ok(tiktoken_rs::model::get_context_size(&self.name))
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue