progress on smarter truncation strategy for file context

2023-10-18 17:56:59 -04:00 · 2023-10-18 17:56:59 -04:00 · 178a84bcf6
commit 178a84bcf6
parent 587fd707ba
4 changed files with 124 additions and 37 deletions
--- a/crates/ai/src/models.rs
+++ b/crates/ai/src/models.rs
@ -6,6 +6,7 @@ pub trait LanguageModel {
    fn name(&self) -> String;
    fn count_tokens(&self, content: &str) -> anyhow::Result<usize>;
    fn truncate(&self, content: &str, length: usize) -> anyhow::Result<String>;
+    fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String>;
    fn capacity(&self) -> anyhow::Result<usize>;
 }

@ -47,6 +48,18 @@ impl LanguageModel for OpenAILanguageModel {
            Err(anyhow!("bpe for open ai model was not retrieved"))
        }
    }
+    fn truncate_start(&self, content: &str, length: usize) -> anyhow::Result<String> {
+        if let Some(bpe) = &self.bpe {
+            let tokens = bpe.encode_with_special_tokens(content);
+            if tokens.len() > length {
+                bpe.decode(tokens[length..].to_vec())
+            } else {
+                bpe.decode(tokens)
+            }
+        } else {
+            Err(anyhow!("bpe for open ai model was not retrieved"))
+        }
+    }
    fn capacity(&self) -> anyhow::Result<usize> {
        anyhow::Ok(tiktoken_rs::model::get_context_size(&self.name))
    }