move truncation to parsing step leveraging the EmbeddingProvider trait

This commit is contained in:
KCaverly 2023-08-30 12:13:26 -04:00
parent 76caea80f7
commit 9781047156
3 changed files with 45 additions and 41 deletions

View file

@ -73,6 +73,7 @@ impl CodeContextRetriever {
sha1.update(&document_span);
let token_count = self.embedding_provider.count_tokens(&document_span);
let document_span = self.embedding_provider.truncate(&document_span);
Ok(vec![Document {
range: 0..content.len(),
@ -93,6 +94,7 @@ impl CodeContextRetriever {
sha1.update(&document_span);
let token_count = self.embedding_provider.count_tokens(&document_span);
let document_span = self.embedding_provider.truncate(&document_span);
Ok(vec![Document {
range: 0..content.len(),
@ -182,6 +184,8 @@ impl CodeContextRetriever {
.replace("item", &document.content);
let token_count = self.embedding_provider.count_tokens(&document_content);
let document_content = self.embedding_provider.truncate(&document_content);
document.content = document_content;
document.token_count = token_count;
}