bedrock: Add support for tool use, cross-region inference, and Claude 3.7 Thinking (#28137)

Closes #27223
Merges: #27996, #26734, #27949 

Release Notes:

- AWS Bedrock: Added advanced authentication strategies with:
  - Short lived credentials with Session Tokens 
  - AWS Named Profile
  - EC2 Identity, Pod Identity, Web Identity
- AWS Bedrock: Added Claude 3.7 Thinking support.
- AWS Bedrock: Adding Cross Region Inference for all combinations of
regions and model availability.
- Agent Beta: Added support for AWS Bedrock.

---------

Co-authored-by: Marshall Bowers <git@maxdeviant.com>
This commit is contained in:
Shardul Vaidya 2025-04-05 11:16:26 -04:00 committed by GitHub
parent ea0f5144c9
commit 525755c28e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1042 additions and 318 deletions

View file

@ -1,21 +1,25 @@
mod models;
use std::collections::HashMap;
use std::pin::Pin;
use anyhow::{Context, Error, Result, anyhow};
use anyhow::{Error, Result, anyhow};
use aws_sdk_bedrockruntime as bedrock;
pub use aws_sdk_bedrockruntime as bedrock_client;
pub use aws_sdk_bedrockruntime::types::{
ContentBlock as BedrockInnerContent, SpecificToolChoice as BedrockSpecificTool,
ToolChoice as BedrockToolChoice, ToolInputSchema as BedrockToolInputSchema,
ToolSpecification as BedrockTool,
AutoToolChoice as BedrockAutoToolChoice, ContentBlock as BedrockInnerContent,
Tool as BedrockTool, ToolChoice as BedrockToolChoice, ToolConfiguration as BedrockToolConfig,
ToolInputSchema as BedrockToolInputSchema, ToolSpecification as BedrockToolSpec,
};
use aws_smithy_types::{Document, Number as AwsNumber};
pub use bedrock::operation::converse_stream::ConverseStreamInput as BedrockStreamingRequest;
pub use bedrock::types::{
ContentBlock as BedrockRequestContent, ConversationRole as BedrockRole,
ConverseOutput as BedrockResponse, ConverseStreamOutput as BedrockStreamingResponse,
Message as BedrockMessage, ResponseStream as BedrockResponseStream,
ImageBlock as BedrockImageBlock, Message as BedrockMessage,
ResponseStream as BedrockResponseStream, ToolResultBlock as BedrockToolResultBlock,
ToolResultContentBlock as BedrockToolResultContentBlock,
ToolResultStatus as BedrockToolResultStatus, ToolUseBlock as BedrockToolUseBlock,
};
use futures::stream::{self, BoxStream, Stream};
use serde::{Deserialize, Serialize};
@ -24,25 +28,6 @@ use thiserror::Error;
pub use crate::models::*;
pub async fn complete(
client: &bedrock::Client,
request: Request,
) -> Result<BedrockResponse, BedrockError> {
let response = bedrock::Client::converse(client)
.model_id(request.model.clone())
.set_messages(request.messages.into())
.send()
.await
.context("failed to send request to Bedrock");
match response {
Ok(output) => output
.output
.ok_or_else(|| BedrockError::Other(anyhow!("no output"))),
Err(err) => Err(BedrockError::Other(err)),
}
}
pub async fn stream_completion(
client: bedrock::Client,
request: Request,
@ -50,11 +35,32 @@ pub async fn stream_completion(
) -> Result<BoxStream<'static, Result<BedrockStreamingResponse, BedrockError>>, Error> {
handle
.spawn(async move {
let response = bedrock::Client::converse_stream(&client)
let mut response = bedrock::Client::converse_stream(&client)
.model_id(request.model.clone())
.set_messages(request.messages.into())
.send()
.await;
.set_messages(request.messages.into());
if let Some(Thinking::Enabled {
budget_tokens: Some(budget_tokens),
}) = request.thinking
{
response =
response.additional_model_request_fields(Document::Object(HashMap::from([(
"thinking".to_string(),
Document::from(HashMap::from([
("type".to_string(), Document::String("enabled".to_string())),
(
"budget_tokens".to_string(),
Document::Number(AwsNumber::PosInt(budget_tokens)),
),
])),
)])));
}
if request.tools.is_some() && !request.tools.as_ref().unwrap().tools.is_empty() {
response = response.set_tool_config(request.tools);
}
let response = response.send().await;
match response {
Ok(output) => {
@ -65,7 +71,7 @@ pub async fn stream_completion(
>,
> = Box::pin(stream::unfold(output.stream, |mut stream| async move {
match stream.recv().await {
Ok(Some(output)) => Some((Ok(output), stream)),
Ok(Some(output)) => Some(({ Ok(output) }, stream)),
Ok(None) => None,
Err(err) => {
Some((
@ -135,13 +141,18 @@ pub fn value_to_aws_document(value: &Value) -> Document {
}
}
#[derive(Debug, Serialize, Deserialize)]
pub enum Thinking {
Enabled { budget_tokens: Option<u64> },
}
#[derive(Debug)]
pub struct Request {
pub model: String,
pub max_tokens: u32,
pub messages: Vec<BedrockMessage>,
pub tools: Vec<BedrockTool>,
pub tool_choice: Option<BedrockToolChoice>,
pub tools: Option<BedrockToolConfig>,
pub thinking: Option<Thinking>,
pub system: Option<String>,
pub metadata: Option<Metadata>,
pub stop_sequences: Vec<String>,

View file

@ -2,21 +2,38 @@ use anyhow::anyhow;
use serde::{Deserialize, Serialize};
use strum::EnumIter;
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub enum BedrockModelMode {
#[default]
Default,
Thinking {
budget_tokens: Option<u64>,
},
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
pub enum Model {
// Anthropic models (already included)
#[default]
#[serde(rename = "claude-3-5-sonnet-v2", alias = "claude-3-5-sonnet-latest")]
Claude3_5Sonnet,
Claude3_5SonnetV2,
#[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
Claude3_7Sonnet,
#[serde(
rename = "claude-3-7-sonnet-thinking",
alias = "claude-3-7-sonnet-thinking-latest"
)]
Claude3_7SonnetThinking,
#[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
Claude3Opus,
#[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
Claude3Sonnet,
#[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
Claude3_5Haiku,
Claude3_5Sonnet,
Claude3Haiku,
// Amazon Nova Models
AmazonNovaLite,
AmazonNovaMicro,
@ -69,7 +86,7 @@ pub enum Model {
impl Model {
pub fn from_id(id: &str) -> anyhow::Result<Self> {
if id.starts_with("claude-3-5-sonnet-v2") {
Ok(Self::Claude3_5Sonnet)
Ok(Self::Claude3_5SonnetV2)
} else if id.starts_with("claude-3-opus") {
Ok(Self::Claude3Opus)
} else if id.starts_with("claude-3-sonnet") {
@ -78,6 +95,8 @@ impl Model {
Ok(Self::Claude3_5Haiku)
} else if id.starts_with("claude-3-7-sonnet") {
Ok(Self::Claude3_7Sonnet)
} else if id.starts_with("claude-3-7-sonnet-thinking") {
Ok(Self::Claude3_7SonnetThinking)
} else {
Err(anyhow!("invalid model id"))
}
@ -85,14 +104,18 @@ impl Model {
pub fn id(&self) -> &str {
match self {
Model::Claude3_5Sonnet => "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
Model::Claude3Opus => "us.anthropic.claude-3-opus-20240229-v1:0",
Model::Claude3Sonnet => "us.anthropic.claude-3-sonnet-20240229-v1:0",
Model::Claude3_5Haiku => "us.anthropic.claude-3-5-haiku-20241022-v1:0",
Model::Claude3_7Sonnet => "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
Model::AmazonNovaLite => "us.amazon.nova-lite-v1:0",
Model::AmazonNovaMicro => "us.amazon.nova-micro-v1:0",
Model::AmazonNovaPro => "us.amazon.nova-pro-v1:0",
Model::Claude3_5SonnetV2 => "anthropic.claude-3-5-sonnet-20241022-v2:0",
Model::Claude3_5Sonnet => "anthropic.claude-3-5-sonnet-20240620-v1:0",
Model::Claude3Opus => "anthropic.claude-3-opus-20240229-v1:0",
Model::Claude3Sonnet => "anthropic.claude-3-sonnet-20240229-v1:0",
Model::Claude3Haiku => "anthropic.claude-3-haiku-20240307-v1:0",
Model::Claude3_5Haiku => "anthropic.claude-3-5-haiku-20241022-v1:0",
Model::Claude3_7Sonnet | Model::Claude3_7SonnetThinking => {
"anthropic.claude-3-7-sonnet-20250219-v1:0"
}
Model::AmazonNovaLite => "amazon.nova-lite-v1:0",
Model::AmazonNovaMicro => "amazon.nova-micro-v1:0",
Model::AmazonNovaPro => "amazon.nova-pro-v1:0",
Model::DeepSeekR1 => "us.deepseek.r1-v1:0",
Model::AI21J2GrandeInstruct => "ai21.j2-grande-instruct",
Model::AI21J2JumboInstruct => "ai21.j2-jumbo-instruct",
@ -128,11 +151,14 @@ impl Model {
pub fn display_name(&self) -> &str {
match self {
Self::Claude3_5Sonnet => "Claude 3.5 Sonnet v2",
Self::Claude3_5SonnetV2 => "Claude 3.5 Sonnet v2",
Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
Self::Claude3Opus => "Claude 3 Opus",
Self::Claude3Sonnet => "Claude 3 Sonnet",
Self::Claude3Haiku => "Claude 3 Haiku",
Self::Claude3_5Haiku => "Claude 3.5 Haiku",
Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
Self::AmazonNovaLite => "Amazon Nova Lite",
Self::AmazonNovaMicro => "Amazon Nova Micro",
Self::AmazonNovaPro => "Amazon Nova Pro",
@ -173,7 +199,7 @@ impl Model {
pub fn max_token_count(&self) -> usize {
match self {
Self::Claude3_5Sonnet
Self::Claude3_5SonnetV2
| Self::Claude3Opus
| Self::Claude3Sonnet
| Self::Claude3_5Haiku
@ -186,7 +212,8 @@ impl Model {
pub fn max_output_tokens(&self) -> u32 {
match self {
Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3_5Haiku => 4_096,
Self::Claude3_5Sonnet => 8_192,
Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => 128_000,
Self::Claude3_5SonnetV2 => 8_192,
Self::Custom {
max_output_tokens, ..
} => max_output_tokens.unwrap_or(4_096),
@ -196,7 +223,7 @@ impl Model {
pub fn default_temperature(&self) -> f32 {
match self {
Self::Claude3_5Sonnet
Self::Claude3_5SonnetV2
| Self::Claude3Opus
| Self::Claude3Sonnet
| Self::Claude3_5Haiku
@ -208,4 +235,253 @@ impl Model {
_ => 1.0,
}
}
pub fn supports_tool_use(&self) -> bool {
match self {
// Anthropic Claude 3 models (all support tool use)
Self::Claude3Opus
| Self::Claude3Sonnet
| Self::Claude3_5Sonnet
| Self::Claude3_5SonnetV2
| Self::Claude3_7Sonnet
| Self::Claude3_7SonnetThinking
| Self::Claude3_5Haiku => true,
// Amazon Nova models (all support tool use)
Self::AmazonNovaPro | Self::AmazonNovaLite | Self::AmazonNovaMicro => true,
// AI21 Jamba 1.5 models support tool use
Self::AI21Jamba15LargeV1 | Self::AI21Jamba15MiniV1 => true,
// Cohere Command R models support tool use
Self::CohereCommandRV1 | Self::CohereCommandRPlusV1 => true,
// All other models don't support tool use
// Including Meta Llama 3.2, AI21 Jurassic, and others
_ => false,
}
}
pub fn mode(&self) -> BedrockModelMode {
match self {
Model::Claude3_7SonnetThinking => BedrockModelMode::Thinking {
budget_tokens: Some(4096),
},
_ => BedrockModelMode::Default,
}
}
pub fn cross_region_inference_id(&self, region: &str) -> Result<String, anyhow::Error> {
let region_group = if region.starts_with("us-gov-") {
"us-gov"
} else if region.starts_with("us-") {
"us"
} else if region.starts_with("eu-") {
"eu"
} else if region.starts_with("ap-") || region == "me-central-1" || region == "me-south-1" {
"apac"
} else if region.starts_with("ca-") || region.starts_with("sa-") {
// Canada and South America regions - default to US profiles
"us"
} else {
// Unknown region
return Err(anyhow!("Unsupported Region"));
};
let model_id = self.id();
match (self, region_group) {
// Custom models can't have CRI IDs
(Model::Custom { .. }, _) => Ok(self.id().into()),
// Models with US Gov only
(Model::Claude3_5Sonnet, "us-gov") | (Model::Claude3Haiku, "us-gov") => {
Ok(format!("{}.{}", region_group, model_id))
}
// Models available only in US
(Model::Claude3Opus, "us")
| (Model::Claude3_7Sonnet, "us")
| (Model::Claude3_7SonnetThinking, "us") => {
Ok(format!("{}.{}", region_group, model_id))
}
// Models available in US, EU, and APAC
(Model::Claude3_5SonnetV2, "us")
| (Model::Claude3_5SonnetV2, "apac")
| (Model::Claude3_5Sonnet, _)
| (Model::Claude3Haiku, _)
| (Model::Claude3Sonnet, _)
| (Model::AmazonNovaLite, _)
| (Model::AmazonNovaMicro, _)
| (Model::AmazonNovaPro, _) => Ok(format!("{}.{}", region_group, model_id)),
// Models with limited EU availability
(Model::MetaLlama321BInstructV1, "us")
| (Model::MetaLlama321BInstructV1, "eu")
| (Model::MetaLlama323BInstructV1, "us")
| (Model::MetaLlama323BInstructV1, "eu") => {
Ok(format!("{}.{}", region_group, model_id))
}
// US-only models (all remaining Meta models)
(Model::MetaLlama38BInstructV1, "us")
| (Model::MetaLlama370BInstructV1, "us")
| (Model::MetaLlama318BInstructV1, "us")
| (Model::MetaLlama318BInstructV1_128k, "us")
| (Model::MetaLlama3170BInstructV1, "us")
| (Model::MetaLlama3170BInstructV1_128k, "us")
| (Model::MetaLlama3211BInstructV1, "us")
| (Model::MetaLlama3290BInstructV1, "us") => {
Ok(format!("{}.{}", region_group, model_id))
}
// Any other combination is not supported
_ => Ok(self.id().into()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_us_region_inference_ids() -> anyhow::Result<()> {
// Test US regions
assert_eq!(
Model::Claude3_5SonnetV2.cross_region_inference_id("us-east-1")?,
"us.anthropic.claude-3-5-sonnet-20241022-v2:0"
);
assert_eq!(
Model::Claude3_5SonnetV2.cross_region_inference_id("us-west-2")?,
"us.anthropic.claude-3-5-sonnet-20241022-v2:0"
);
assert_eq!(
Model::AmazonNovaPro.cross_region_inference_id("us-east-2")?,
"us.amazon.nova-pro-v1:0"
);
Ok(())
}
#[test]
fn test_eu_region_inference_ids() -> anyhow::Result<()> {
// Test European regions
assert_eq!(
Model::Claude3Sonnet.cross_region_inference_id("eu-west-1")?,
"eu.anthropic.claude-3-sonnet-20240229-v1:0"
);
assert_eq!(
Model::AmazonNovaMicro.cross_region_inference_id("eu-north-1")?,
"eu.amazon.nova-micro-v1:0"
);
Ok(())
}
#[test]
fn test_apac_region_inference_ids() -> anyhow::Result<()> {
// Test Asia-Pacific regions
assert_eq!(
Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1")?,
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0"
);
assert_eq!(
Model::AmazonNovaLite.cross_region_inference_id("ap-south-1")?,
"apac.amazon.nova-lite-v1:0"
);
Ok(())
}
#[test]
fn test_gov_region_inference_ids() -> anyhow::Result<()> {
// Test Government regions
assert_eq!(
Model::Claude3_5Sonnet.cross_region_inference_id("us-gov-east-1")?,
"us-gov.anthropic.claude-3-5-sonnet-20240620-v1:0"
);
assert_eq!(
Model::Claude3Haiku.cross_region_inference_id("us-gov-west-1")?,
"us-gov.anthropic.claude-3-haiku-20240307-v1:0"
);
Ok(())
}
#[test]
fn test_meta_models_inference_ids() -> anyhow::Result<()> {
// Test Meta models
assert_eq!(
Model::MetaLlama370BInstructV1.cross_region_inference_id("us-east-1")?,
"us.meta.llama3-70b-instruct-v1:0"
);
assert_eq!(
Model::MetaLlama321BInstructV1.cross_region_inference_id("eu-west-1")?,
"eu.meta.llama3-2-1b-instruct-v1:0"
);
Ok(())
}
#[test]
fn test_mistral_models_inference_ids() -> anyhow::Result<()> {
// Mistral models don't follow the regional prefix pattern,
// so they should return their original IDs
assert_eq!(
Model::MistralMistralLarge2402V1.cross_region_inference_id("us-east-1")?,
"mistral.mistral-large-2402-v1:0"
);
assert_eq!(
Model::MistralMixtral8x7BInstructV0.cross_region_inference_id("eu-west-1")?,
"mistral.mixtral-8x7b-instruct-v0:1"
);
Ok(())
}
#[test]
fn test_ai21_models_inference_ids() -> anyhow::Result<()> {
// AI21 models don't follow the regional prefix pattern,
// so they should return their original IDs
assert_eq!(
Model::AI21J2UltraV1.cross_region_inference_id("us-east-1")?,
"ai21.j2-ultra-v1"
);
assert_eq!(
Model::AI21JambaInstructV1.cross_region_inference_id("eu-west-1")?,
"ai21.jamba-instruct-v1:0"
);
Ok(())
}
#[test]
fn test_cohere_models_inference_ids() -> anyhow::Result<()> {
// Cohere models don't follow the regional prefix pattern,
// so they should return their original IDs
assert_eq!(
Model::CohereCommandRV1.cross_region_inference_id("us-east-1")?,
"cohere.command-r-v1:0"
);
assert_eq!(
Model::CohereCommandTextV14_4k.cross_region_inference_id("ap-southeast-1")?,
"cohere.command-text-v14:7:4k"
);
Ok(())
}
#[test]
fn test_custom_model_inference_ids() -> anyhow::Result<()> {
// Test custom models
let custom_model = Model::Custom {
name: "custom.my-model-v1:0".to_string(),
max_tokens: 100000,
display_name: Some("My Custom Model".to_string()),
max_output_tokens: Some(8192),
default_temperature: Some(0.7),
};
// Custom model should return its name unchanged
assert_eq!(
custom_model.cross_region_inference_id("us-east-1")?,
"custom.my-model-v1:0"
);
Ok(())
}
}