Migrate to schemars version 1.0 (#33635)

The major change in schemars 1.0 is that now schemas are represented as
plain json values instead of specialized datatypes. This allows for more
concise construction and manipulation.

This change also improves how settings schemas are generated. Each top
level settings type was being generated as a full root schema including
the definitions it references, and then these were merged. This meant
generating all shared definitions multiple times, and might have bugs in
cases where there are two types with the same names.

Now instead the schemar generator's `definitions` are built up as they
normally are and the `Settings` trait no longer has a special
`json_schema` method. To handle types that have schema that vary at
runtime (`FontFamilyName`, `ThemeName`, etc), values of
`ParameterizedJsonSchema` are collected by `inventory`, and the schema
definitions for these types are replaced.

To help check that this doesn't break anything, I tried to minimize the
overall [schema
diff](https://gist.github.com/mgsloan/1de549def20399d6f37943a3c1583ee7)
with some patches to make the order more consistent + schemas also
sorted with `jq -S .`. A skim of the diff shows that the diffs come
from:

* `enum: ["value"]` turning into `const: "value"`
* Differences in handling of newlines for "description"
* Schemas for generic types no longer including the parameter name, now
all disambiguation is with numeric suffixes
* Enums now using `oneOf` instead of `anyOf`.

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2025-06-30 15:07:28 -06:00 committed by GitHub
parent a2e786e0f9
commit 5fafab6e52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 714 additions and 963 deletions

View file

@ -1,8 +1,9 @@
use anyhow::Result;
use language_model::LanguageModelToolSchemaFormat;
use schemars::{
JsonSchema,
schema::{RootSchema, Schema, SchemaObject},
JsonSchema, Schema,
generate::SchemaSettings,
transform::{Transform, transform_subschemas},
};
pub fn json_schema_for<T: JsonSchema>(
@ -13,7 +14,7 @@ pub fn json_schema_for<T: JsonSchema>(
}
fn schema_to_json(
schema: &RootSchema,
schema: &Schema,
format: LanguageModelToolSchemaFormat,
) -> Result<serde_json::Value> {
let mut value = serde_json::to_value(schema)?;
@ -21,58 +22,42 @@ fn schema_to_json(
Ok(value)
}
fn root_schema_for<T: JsonSchema>(format: LanguageModelToolSchemaFormat) -> RootSchema {
fn root_schema_for<T: JsonSchema>(format: LanguageModelToolSchemaFormat) -> Schema {
let mut generator = match format {
LanguageModelToolSchemaFormat::JsonSchema => schemars::SchemaGenerator::default(),
LanguageModelToolSchemaFormat::JsonSchemaSubset => {
schemars::r#gen::SchemaSettings::default()
.with(|settings| {
settings.meta_schema = None;
settings.inline_subschemas = true;
settings
.visitors
.push(Box::new(TransformToJsonSchemaSubsetVisitor));
})
.into_generator()
}
LanguageModelToolSchemaFormat::JsonSchema => SchemaSettings::draft07().into_generator(),
// TODO: Gemini docs mention using a subset of OpenAPI 3, so this may benefit from using
// `SchemaSettings::openapi3()`.
LanguageModelToolSchemaFormat::JsonSchemaSubset => SchemaSettings::draft07()
.with(|settings| {
settings.meta_schema = None;
settings.inline_subschemas = true;
})
.with_transform(ToJsonSchemaSubsetTransform)
.into_generator(),
};
generator.root_schema_for::<T>()
}
#[derive(Debug, Clone)]
struct TransformToJsonSchemaSubsetVisitor;
struct ToJsonSchemaSubsetTransform;
impl schemars::visit::Visitor for TransformToJsonSchemaSubsetVisitor {
fn visit_root_schema(&mut self, root: &mut RootSchema) {
schemars::visit::visit_root_schema(self, root)
}
fn visit_schema(&mut self, schema: &mut Schema) {
schemars::visit::visit_schema(self, schema)
}
fn visit_schema_object(&mut self, schema: &mut SchemaObject) {
impl Transform for ToJsonSchemaSubsetTransform {
fn transform(&mut self, schema: &mut Schema) {
// Ensure that the type field is not an array, this happens when we use
// Option<T>, the type will be [T, "null"].
if let Some(instance_type) = schema.instance_type.take() {
schema.instance_type = match instance_type {
schemars::schema::SingleOrVec::Single(t) => {
Some(schemars::schema::SingleOrVec::Single(t))
if let Some(type_field) = schema.get_mut("type") {
if let Some(types) = type_field.as_array() {
if let Some(first_type) = types.first() {
*type_field = first_type.clone();
}
schemars::schema::SingleOrVec::Vec(items) => items
.into_iter()
.next()
.map(schemars::schema::SingleOrVec::from),
};
}
// One of is not supported, use anyOf instead.
if let Some(subschema) = schema.subschemas.as_mut() {
if let Some(one_of) = subschema.one_of.take() {
subschema.any_of = Some(one_of);
}
}
schemars::visit::visit_schema_object(self, schema)
// oneOf is not supported, use anyOf instead
if let Some(one_of) = schema.remove("oneOf") {
schema.insert("anyOf".to_string(), one_of);
}
transform_subschemas(self, schema);
}
}