Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/openai/lib/_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ def _ensure_strict_json_schema(
if is_dict(items):
json_schema["items"] = _ensure_strict_json_schema(items, path=(*path, "items"), root=root)

# typed dictionaries
# { 'type': 'object', 'additionalProperties': {...} }
# Note: additionalProperties can be boolean (true/false) or a schema dict
additional_properties = json_schema.get("additionalProperties")
if is_dict(additional_properties):
json_schema["additionalProperties"] = _ensure_strict_json_schema(
additional_properties, path=(*path, "additionalProperties"), root=root
)

# unions
any_of = json_schema.get("anyOf")
if is_list(any_of):
Expand Down Expand Up @@ -112,6 +121,30 @@ def _ensure_strict_json_schema(
# we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
return _ensure_strict_json_schema(json_schema, path=path, root=root)

# Remove JSON Schema keywords that are not supported by OpenAI's structured outputs
# These keywords are used for validation but cause errors with strict mode
# See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
unsupported_keywords = [
"pattern", # Regex patterns (e.g., from Decimal fields)
"format", # String formats like "date-time"
"minLength", # String length constraints
"maxLength", # String length constraints
"minimum", # Numeric minimum values
"maximum", # Numeric maximum values
"exclusiveMinimum", # Exclusive numeric bounds
"exclusiveMaximum", # Exclusive numeric bounds
"multipleOf", # Numeric multiple constraints
"patternProperties", # Pattern-based object properties
"minItems", # Array size constraints
"maxItems", # Array size constraints
"minProperties", # Object property count constraints
"maxProperties", # Object property count constraints
"uniqueItems", # Array uniqueness constraints
]

for keyword in unsupported_keywords:
json_schema.pop(keyword, None)

return json_schema


Expand Down
144 changes: 144 additions & 0 deletions tests/lib/test_pydantic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from decimal import Decimal
from enum import Enum

from pydantic import Field, BaseModel
Expand Down Expand Up @@ -409,3 +410,146 @@ def test_nested_inline_ref_expansion() -> None:
"additionalProperties": False,
}
)


class InsuranceQuote(BaseModel):
"""Test model with Decimal field to verify pattern keyword is stripped"""
premium: Decimal = Field(description="The insurance premium amount")
coverage_amount: float = Field(description="The coverage amount")
customer_name: str = Field(description="The customer's name")


def test_decimal_field_strips_pattern() -> None:
"""
Test that Decimal fields do not include unsupported 'pattern' keyword.

Pydantic generates a regex pattern for Decimal fields by default, but this
is not supported by OpenAI's structured outputs in strict mode. This test
verifies that the pattern keyword is properly stripped from the schema.

Fixes issue #2718
"""
if not PYDANTIC_V1:
schema = to_strict_json_schema(InsuranceQuote)

# Verify the schema structure exists
assert "properties" in schema
assert "premium" in schema["properties"]

# Get the premium field schema
premium_schema = schema["properties"]["premium"]

# Verify it's an anyOf with number/string/null options
assert "anyOf" in premium_schema

# Check all variants in the anyOf for 'pattern' keyword
# Pattern should NOT be present after our fix
for variant in premium_schema["anyOf"]:
assert "pattern" not in variant, (
"Pattern keyword should be stripped from Decimal field schema. "
"Found pattern in variant: " + str(variant)
)

# Verify the schema matches expected structure (without pattern)
assert schema == snapshot(
{
"title": "InsuranceQuote",
"type": "object",
"properties": {
"premium": {
"anyOf": [
{"type": "number"},
{"type": "string"},
{"type": "null"}
],
"description": "The insurance premium amount",
"title": "Premium",
},
"coverage_amount": {
"description": "The coverage amount",
"title": "Coverage Amount",
"type": "number",
},
"customer_name": {
"description": "The customer's name",
"title": "Customer Name",
"type": "string",
},
},
"required": ["premium", "coverage_amount", "customer_name"],
"additionalProperties": False,
}
)


class ProductPricing(BaseModel):
"""Test model with Dict[str, Decimal] to verify pattern is stripped from additionalProperties"""
prices: dict[str, Decimal] = Field(description="Product prices by region")
product_name: str = Field(description="The product name")


def test_dict_decimal_strips_pattern_in_additional_properties() -> None:
"""
Test that Dict[str, Decimal] fields strip pattern from additionalProperties.

When Pydantic generates schemas for typed dictionaries (Dict[str, Decimal]),
it uses additionalProperties with a Decimal schema that includes a regex
pattern. This test verifies that pattern keywords are stripped from nested
schemas within additionalProperties.

Addresses Codex review feedback on PR #2733
"""
if not PYDANTIC_V1:
schema = to_strict_json_schema(ProductPricing)

# Verify the schema structure exists
assert "properties" in schema
assert "prices" in schema["properties"]

# Get the prices field schema
prices_schema = schema["properties"]["prices"]

# Should be an object with additionalProperties
assert prices_schema.get("type") == "object"
assert "additionalProperties" in prices_schema

# Get the additionalProperties schema (Decimal schema)
add_props = prices_schema["additionalProperties"]
assert "anyOf" in add_props

# Check all variants in anyOf for 'pattern' keyword
# Pattern should NOT be present after our fix
for variant in add_props["anyOf"]:
assert "pattern" not in variant, (
"Pattern keyword should be stripped from additionalProperties Decimal schema. "
"Found pattern in variant: " + str(variant)
)

# Verify the full schema matches expected structure
assert schema == snapshot(
{
"description": "Test model with Dict[str, Decimal] to verify pattern is stripped from additionalProperties",
"properties": {
"prices": {
"additionalProperties": {
"anyOf": [
{"type": "number"},
{"type": "string"},
]
},
"description": "Product prices by region",
"title": "Prices",
"type": "object",
},
"product_name": {
"description": "The product name",
"title": "Product Name",
"type": "string",
},
},
"required": ["prices", "product_name"],
"title": "ProductPricing",
"type": "object",
"additionalProperties": False,
}
)