From 1ab49fdbe6bd6940d8e4ee54e613875be627aaf0 Mon Sep 17 00:00:00 2001 From: Todsaporn Banjerdkit Date: Tue, 30 Jan 2024 10:42:03 +0700 Subject: [PATCH] Use fallback BPE if the language model doesn't have one (#6848) Release Notes: - Added a fallback BPE if the language model doesn't have one. --------- Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Co-authored-by: Marshall Bowers --- crates/ai/src/providers/open_ai/embedding.rs | 2 +- crates/ai/src/providers/open_ai/model.rs | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/ai/src/providers/open_ai/embedding.rs b/crates/ai/src/providers/open_ai/embedding.rs index 7480a454a1..4a8b051df3 100644 --- a/crates/ai/src/providers/open_ai/embedding.rs +++ b/crates/ai/src/providers/open_ai/embedding.rs @@ -30,7 +30,7 @@ use crate::providers::open_ai::OpenAiLanguageModel; use crate::providers::open_ai::OPEN_AI_API_URL; lazy_static! { - static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap(); + pub(crate) static ref OPEN_AI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap(); } #[derive(Clone)] diff --git a/crates/ai/src/providers/open_ai/model.rs b/crates/ai/src/providers/open_ai/model.rs index ba3488d7dd..21ea0334bd 100644 --- a/crates/ai/src/providers/open_ai/model.rs +++ b/crates/ai/src/providers/open_ai/model.rs @@ -1,9 +1,10 @@ use anyhow::anyhow; use tiktoken_rs::CoreBPE; -use util::ResultExt; use crate::models::{LanguageModel, TruncationDirection}; +use super::OPEN_AI_BPE_TOKENIZER; + #[derive(Clone)] pub struct OpenAiLanguageModel { name: String, @@ -12,10 +13,11 @@ pub struct OpenAiLanguageModel { impl OpenAiLanguageModel { pub fn load(model_name: &str) -> Self { - let bpe = tiktoken_rs::get_bpe_from_model(model_name).log_err(); + let bpe = + tiktoken_rs::get_bpe_from_model(model_name).unwrap_or(OPEN_AI_BPE_TOKENIZER.to_owned()); OpenAiLanguageModel { name: model_name.to_string(), - bpe, + bpe: Some(bpe), } } }