bedrock: Fix cross-region inference (#30659)

Closes #30535

Release Notes:

- AWS Bedrock: Add support for Meta Llama 4 Scout and Maverick models.
- AWS Bedrock: Fixed cross-region inference for all regions.
- AWS Bedrock: Updated all models available through Cross Region
inference.

---------

Co-authored-by: Marshall Bowers <git@maxdeviant.com>
This commit is contained in:
Shardul Vaidya 2025-06-03 11:46:35 -04:00 committed by GitHub
parent c0397727e0
commit e13b494c9e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 105 additions and 84 deletions

View file

@ -71,16 +71,20 @@ pub enum Model {
// DeepSeek // DeepSeek
DeepSeekR1, DeepSeekR1,
// Meta models // Meta models
MetaLlama38BInstructV1, MetaLlama3_8BInstruct,
MetaLlama370BInstructV1, MetaLlama3_70BInstruct,
MetaLlama318BInstructV1_128k, MetaLlama31_8BInstruct,
MetaLlama318BInstructV1, MetaLlama31_70BInstruct,
MetaLlama3170BInstructV1_128k, MetaLlama31_405BInstruct,
MetaLlama3170BInstructV1, MetaLlama32_1BInstruct,
MetaLlama3211BInstructV1, MetaLlama32_3BInstruct,
MetaLlama3290BInstructV1, MetaLlama32_11BMultiModal,
MetaLlama321BInstructV1, MetaLlama32_90BMultiModal,
MetaLlama323BInstructV1, MetaLlama33_70BInstruct,
#[allow(non_camel_case_types)]
MetaLlama4Scout_17BInstruct,
#[allow(non_camel_case_types)]
MetaLlama4Maverick_17BInstruct,
// Mistral models // Mistral models
MistralMistral7BInstructV0, MistralMistral7BInstructV0,
MistralMixtral8x7BInstructV0, MistralMixtral8x7BInstructV0,
@ -145,7 +149,7 @@ impl Model {
Model::AmazonNovaMicro => "amazon.nova-micro-v1:0", Model::AmazonNovaMicro => "amazon.nova-micro-v1:0",
Model::AmazonNovaPro => "amazon.nova-pro-v1:0", Model::AmazonNovaPro => "amazon.nova-pro-v1:0",
Model::AmazonNovaPremier => "amazon.nova-premier-v1:0", Model::AmazonNovaPremier => "amazon.nova-premier-v1:0",
Model::DeepSeekR1 => "us.deepseek.r1-v1:0", Model::DeepSeekR1 => "deepseek.r1-v1:0",
Model::AI21J2GrandeInstruct => "ai21.j2-grande-instruct", Model::AI21J2GrandeInstruct => "ai21.j2-grande-instruct",
Model::AI21J2JumboInstruct => "ai21.j2-jumbo-instruct", Model::AI21J2JumboInstruct => "ai21.j2-jumbo-instruct",
Model::AI21J2Mid => "ai21.j2-mid", Model::AI21J2Mid => "ai21.j2-mid",
@ -160,16 +164,18 @@ impl Model {
Model::CohereCommandRV1 => "cohere.command-r-v1:0", Model::CohereCommandRV1 => "cohere.command-r-v1:0",
Model::CohereCommandRPlusV1 => "cohere.command-r-plus-v1:0", Model::CohereCommandRPlusV1 => "cohere.command-r-plus-v1:0",
Model::CohereCommandLightTextV14_4k => "cohere.command-light-text-v14:7:4k", Model::CohereCommandLightTextV14_4k => "cohere.command-light-text-v14:7:4k",
Model::MetaLlama38BInstructV1 => "meta.llama3-8b-instruct-v1:0", Model::MetaLlama3_8BInstruct => "meta.llama3-8b-instruct-v1:0",
Model::MetaLlama370BInstructV1 => "meta.llama3-70b-instruct-v1:0", Model::MetaLlama3_70BInstruct => "meta.llama3-70b-instruct-v1:0",
Model::MetaLlama318BInstructV1_128k => "meta.llama3-1-8b-instruct-v1:0:128k", Model::MetaLlama31_8BInstruct => "meta.llama3-1-8b-instruct-v1:0",
Model::MetaLlama318BInstructV1 => "meta.llama3-1-8b-instruct-v1:0", Model::MetaLlama31_70BInstruct => "meta.llama3-1-70b-instruct-v1:0",
Model::MetaLlama3170BInstructV1_128k => "meta.llama3-1-70b-instruct-v1:0:128k", Model::MetaLlama31_405BInstruct => "meta.llama3-1-405b-instruct-v1:0",
Model::MetaLlama3170BInstructV1 => "meta.llama3-1-70b-instruct-v1:0", Model::MetaLlama32_11BMultiModal => "meta.llama3-2-11b-instruct-v1:0",
Model::MetaLlama3211BInstructV1 => "meta.llama3-2-11b-instruct-v1:0", Model::MetaLlama32_90BMultiModal => "meta.llama3-2-90b-instruct-v1:0",
Model::MetaLlama3290BInstructV1 => "meta.llama3-2-90b-instruct-v1:0", Model::MetaLlama32_1BInstruct => "meta.llama3-2-1b-instruct-v1:0",
Model::MetaLlama321BInstructV1 => "meta.llama3-2-1b-instruct-v1:0", Model::MetaLlama32_3BInstruct => "meta.llama3-2-3b-instruct-v1:0",
Model::MetaLlama323BInstructV1 => "meta.llama3-2-3b-instruct-v1:0", Model::MetaLlama33_70BInstruct => "meta.llama3-3-70b-instruct-v1:0",
Model::MetaLlama4Scout_17BInstruct => "meta.llama4-scout-17b-instruct-v1:0",
Model::MetaLlama4Maverick_17BInstruct => "meta.llama4-maverick-17b-instruct-v1:0",
Model::MistralMistral7BInstructV0 => "mistral.mistral-7b-instruct-v0:2", Model::MistralMistral7BInstructV0 => "mistral.mistral-7b-instruct-v0:2",
Model::MistralMixtral8x7BInstructV0 => "mistral.mixtral-8x7b-instruct-v0:1", Model::MistralMixtral8x7BInstructV0 => "mistral.mixtral-8x7b-instruct-v0:1",
Model::MistralMistralLarge2402V1 => "mistral.mistral-large-2402-v1:0", Model::MistralMistralLarge2402V1 => "mistral.mistral-large-2402-v1:0",
@ -214,16 +220,18 @@ impl Model {
Self::CohereCommandRV1 => "Cohere Command R V1", Self::CohereCommandRV1 => "Cohere Command R V1",
Self::CohereCommandRPlusV1 => "Cohere Command R Plus V1", Self::CohereCommandRPlusV1 => "Cohere Command R Plus V1",
Self::CohereCommandLightTextV14_4k => "Cohere Command Light Text V14 4K", Self::CohereCommandLightTextV14_4k => "Cohere Command Light Text V14 4K",
Self::MetaLlama38BInstructV1 => "Meta Llama 3 8B Instruct V1", Self::MetaLlama3_8BInstruct => "Meta Llama 3 8B Instruct",
Self::MetaLlama370BInstructV1 => "Meta Llama 3 70B Instruct V1", Self::MetaLlama3_70BInstruct => "Meta Llama 3 70B Instruct",
Self::MetaLlama318BInstructV1_128k => "Meta Llama 3 1.8B Instruct V1 128K", Self::MetaLlama31_8BInstruct => "Meta Llama 3.1 8B Instruct",
Self::MetaLlama318BInstructV1 => "Meta Llama 3 1.8B Instruct V1", Self::MetaLlama31_70BInstruct => "Meta Llama 3.1 70B Instruct",
Self::MetaLlama3170BInstructV1_128k => "Meta Llama 3 1 70B Instruct V1 128K", Self::MetaLlama31_405BInstruct => "Meta Llama 3.1 405B Instruct",
Self::MetaLlama3170BInstructV1 => "Meta Llama 3 1 70B Instruct V1", Self::MetaLlama32_11BMultiModal => "Meta Llama 3.2 11B Vision Instruct",
Self::MetaLlama3211BInstructV1 => "Meta Llama 3 2 11B Instruct V1", Self::MetaLlama32_90BMultiModal => "Meta Llama 3.2 90B Vision Instruct",
Self::MetaLlama3290BInstructV1 => "Meta Llama 3 2 90B Instruct V1", Self::MetaLlama32_1BInstruct => "Meta Llama 3.2 1B Instruct",
Self::MetaLlama321BInstructV1 => "Meta Llama 3 2 1B Instruct V1", Self::MetaLlama32_3BInstruct => "Meta Llama 3.2 3B Instruct",
Self::MetaLlama323BInstructV1 => "Meta Llama 3 2 3B Instruct V1", Self::MetaLlama33_70BInstruct => "Meta Llama 3.3 70B Instruct",
Self::MetaLlama4Scout_17BInstruct => "Meta Llama 4 Scout 17B Instruct",
Self::MetaLlama4Maverick_17BInstruct => "Meta Llama 4 Maverick 17B Instruct",
Self::MistralMistral7BInstructV0 => "Mistral 7B Instruct V0", Self::MistralMistral7BInstructV0 => "Mistral 7B Instruct V0",
Self::MistralMixtral8x7BInstructV0 => "Mistral Mixtral 8x7B Instruct V0", Self::MistralMixtral8x7BInstructV0 => "Mistral Mixtral 8x7B Instruct V0",
Self::MistralMistralLarge2402V1 => "Mistral Large 2402 V1", Self::MistralMistralLarge2402V1 => "Mistral Large 2402 V1",
@ -365,55 +373,60 @@ impl Model {
Ok(format!("{}.{}", region_group, model_id)) Ok(format!("{}.{}", region_group, model_id))
} }
// Models available only in US // Available everywhere
(Model::Claude3Opus, "us") (Model::AmazonNovaLite | Model::AmazonNovaMicro | Model::AmazonNovaPro, _) => {
| (Model::Claude3_5Haiku, "us")
| (Model::Claude3_7Sonnet, "us")
| (Model::ClaudeSonnet4, "us")
| (Model::ClaudeOpus4, "us")
| (Model::ClaudeSonnet4Thinking, "us")
| (Model::ClaudeOpus4Thinking, "us")
| (Model::Claude3_7SonnetThinking, "us")
| (Model::AmazonNovaPremier, "us")
| (Model::MistralPixtralLarge2502V1, "us") => {
Ok(format!("{}.{}", region_group, model_id)) Ok(format!("{}.{}", region_group, model_id))
} }
// Models available in US, EU, and APAC // Models in US
(Model::Claude3_5SonnetV2, "us") (
| (Model::Claude3_5SonnetV2, "apac") Model::AmazonNovaPremier
| (Model::Claude3_5Sonnet, _) | Model::Claude3_5Haiku
| (Model::Claude3Haiku, _) | Model::Claude3_5Sonnet
| (Model::Claude3Sonnet, _) | Model::Claude3_5SonnetV2
| (Model::AmazonNovaLite, _) | Model::Claude3_7Sonnet
| (Model::AmazonNovaMicro, _) | Model::Claude3_7SonnetThinking
| (Model::AmazonNovaPro, _) => Ok(format!("{}.{}", region_group, model_id)), | Model::Claude3Haiku
| Model::Claude3Opus
| Model::Claude3Sonnet
| Model::DeepSeekR1
| Model::MetaLlama31_405BInstruct
| Model::MetaLlama31_70BInstruct
| Model::MetaLlama31_8BInstruct
| Model::MetaLlama32_11BMultiModal
| Model::MetaLlama32_1BInstruct
| Model::MetaLlama32_3BInstruct
| Model::MetaLlama32_90BMultiModal
| Model::MetaLlama33_70BInstruct
| Model::MetaLlama4Maverick_17BInstruct
| Model::MetaLlama4Scout_17BInstruct
| Model::MistralPixtralLarge2502V1
| Model::PalmyraWriterX4
| Model::PalmyraWriterX5,
"us",
) => Ok(format!("{}.{}", region_group, model_id)),
// Models with limited EU availability // Models available in EU
(Model::MetaLlama321BInstructV1, "us") (
| (Model::MetaLlama321BInstructV1, "eu") Model::Claude3_5Sonnet
| (Model::MetaLlama323BInstructV1, "us") | Model::Claude3_7Sonnet
| (Model::MetaLlama323BInstructV1, "eu") => { | Model::Claude3_7SonnetThinking
Ok(format!("{}.{}", region_group, model_id)) | Model::Claude3Haiku
} | Model::Claude3Sonnet
| Model::MetaLlama32_1BInstruct
| Model::MetaLlama32_3BInstruct
| Model::MistralPixtralLarge2502V1,
"eu",
) => Ok(format!("{}.{}", region_group, model_id)),
// US-only models (all remaining Meta models) // Models available in APAC
(Model::MetaLlama38BInstructV1, "us") (
| (Model::MetaLlama370BInstructV1, "us") Model::Claude3_5Sonnet
| (Model::MetaLlama318BInstructV1, "us") | Model::Claude3_5SonnetV2
| (Model::MetaLlama318BInstructV1_128k, "us") | Model::Claude3Haiku
| (Model::MetaLlama3170BInstructV1, "us") | Model::Claude3Sonnet,
| (Model::MetaLlama3170BInstructV1_128k, "us") "apac",
| (Model::MetaLlama3211BInstructV1, "us") ) => Ok(format!("{}.{}", region_group, model_id)),
| (Model::MetaLlama3290BInstructV1, "us") => {
Ok(format!("{}.{}", region_group, model_id))
}
// Writer models only available in the US
(Model::PalmyraWriterX4, "us") | (Model::PalmyraWriterX5, "us") => {
// They have some goofiness
Ok(format!("{}.{}", region_group, model_id))
}
// Any other combination is not supported // Any other combination is not supported
_ => Ok(self.id().into()), _ => Ok(self.id().into()),
@ -464,6 +477,10 @@ mod tests {
Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1")?, Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1")?,
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0" "apac.anthropic.claude-3-5-sonnet-20241022-v2:0"
); );
assert_eq!(
Model::Claude3_5SonnetV2.cross_region_inference_id("ap-southeast-2")?,
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0"
);
assert_eq!( assert_eq!(
Model::AmazonNovaLite.cross_region_inference_id("ap-south-1")?, Model::AmazonNovaLite.cross_region_inference_id("ap-south-1")?,
"apac.amazon.nova-lite-v1:0" "apac.amazon.nova-lite-v1:0"
@ -489,11 +506,15 @@ mod tests {
fn test_meta_models_inference_ids() -> anyhow::Result<()> { fn test_meta_models_inference_ids() -> anyhow::Result<()> {
// Test Meta models // Test Meta models
assert_eq!( assert_eq!(
Model::MetaLlama370BInstructV1.cross_region_inference_id("us-east-1")?, Model::MetaLlama3_70BInstruct.cross_region_inference_id("us-east-1")?,
"us.meta.llama3-70b-instruct-v1:0" "meta.llama3-70b-instruct-v1:0"
); );
assert_eq!( assert_eq!(
Model::MetaLlama321BInstructV1.cross_region_inference_id("eu-west-1")?, Model::MetaLlama31_70BInstruct.cross_region_inference_id("us-east-1")?,
"us.meta.llama3-1-70b-instruct-v1:0"
);
assert_eq!(
Model::MetaLlama32_1BInstruct.cross_region_inference_id("eu-west-1")?,
"eu.meta.llama3-2-1b-instruct-v1:0" "eu.meta.llama3-2-1b-instruct-v1:0"
); );
Ok(()) Ok(())

View file

@ -531,13 +531,13 @@ impl LanguageModel for BedrockModel {
> { > {
let Ok(region) = cx.read_entity(&self.state, |state, _cx| { let Ok(region) = cx.read_entity(&self.state, |state, _cx| {
// Get region - from credentials or directly from settings // Get region - from credentials or directly from settings
let region = state let credentials_region = state.credentials.as_ref().map(|s| s.region.clone());
.credentials let settings_region = state.settings.as_ref().and_then(|s| s.region.clone());
.as_ref()
.map(|s| s.region.clone())
.unwrap_or(String::from("us-east-1"));
region // Use credentials region if available, otherwise use settings region, finally fall back to default
credentials_region
.or(settings_region)
.unwrap_or(String::from("us-east-1"))
}) else { }) else {
return async move { return async move {
anyhow::bail!("App State Dropped"); anyhow::bail!("App State Dropped");