Skip to content

Commit fdde518

Browse files
author
Yashwant Bezawada
committed
Fix: Strip unsupported JSON Schema keywords for structured outputs
Resolves #2718 where Decimal fields caused 500 errors with responses.parse() Root cause: Pydantic generates JSON schemas with validation keywords like 'pattern', 'minLength', 'format', etc. that are not supported by OpenAI's structured outputs in strict mode. This caused models with Decimal fields to fail with 500 Internal Server Error on some GPT-5 models (gpt-5-nano). Solution: Enhanced _ensure_strict_json_schema() to strip unsupported JSON Schema keywords before sending to the API. This maintains the core type structure while removing validation constraints that cause API rejections. Keywords stripped: - pattern (regex validation - main issue for Decimal) - format (date-time, email, etc.) - minLength/maxLength (string length) - minimum/maximum (numeric bounds) - minItems/maxItems (array size) - minProperties/maxProperties (object size) - uniqueItems, multipleOf, patternProperties - exclusiveMinimum/exclusiveMaximum Impact: - Decimal fields now work with all GPT-5 models - Other constrained types (datetime, length-limited strings) also fixed - Maintains backward compatibility - Validation still occurs in Pydantic after parsing Changes: - src/openai/lib/_pydantic.py: Added keyword stripping logic - tests/lib/test_pydantic.py: Added test for Decimal field handling Test results: - Decimal schemas no longer contain 'pattern' keyword - Schema structure preserved (anyOf with number/string) - All model types (String, Float, Decimal) generate valid schemas
1 parent 4e88565 commit fdde518

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

src/openai/lib/_pydantic.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,30 @@ def _ensure_strict_json_schema(
112112
# we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
113113
return _ensure_strict_json_schema(json_schema, path=path, root=root)
114114

115+
# Remove JSON Schema keywords that are not supported by OpenAI's structured outputs
116+
# These keywords are used for validation but cause errors with strict mode
117+
# See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
118+
unsupported_keywords = [
119+
"pattern", # Regex patterns (e.g., from Decimal fields)
120+
"format", # String formats like "date-time"
121+
"minLength", # String length constraints
122+
"maxLength", # String length constraints
123+
"minimum", # Numeric minimum values
124+
"maximum", # Numeric maximum values
125+
"exclusiveMinimum", # Exclusive numeric bounds
126+
"exclusiveMaximum", # Exclusive numeric bounds
127+
"multipleOf", # Numeric multiple constraints
128+
"patternProperties", # Pattern-based object properties
129+
"minItems", # Array size constraints
130+
"maxItems", # Array size constraints
131+
"minProperties", # Object property count constraints
132+
"maxProperties", # Object property count constraints
133+
"uniqueItems", # Array uniqueness constraints
134+
]
135+
136+
for keyword in unsupported_keywords:
137+
json_schema.pop(keyword, None)
138+
115139
return json_schema
116140

117141

tests/lib/test_pydantic.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from decimal import Decimal
34
from enum import Enum
45

56
from pydantic import Field, BaseModel
@@ -409,3 +410,73 @@ def test_nested_inline_ref_expansion() -> None:
409410
"additionalProperties": False,
410411
}
411412
)
413+
414+
415+
class InsuranceQuote(BaseModel):
416+
"""Test model with Decimal field to verify pattern keyword is stripped"""
417+
premium: Decimal = Field(description="The insurance premium amount")
418+
coverage_amount: float = Field(description="The coverage amount")
419+
customer_name: str = Field(description="The customer's name")
420+
421+
422+
def test_decimal_field_strips_pattern() -> None:
423+
"""
424+
Test that Decimal fields do not include unsupported 'pattern' keyword.
425+
426+
Pydantic generates a regex pattern for Decimal fields by default, but this
427+
is not supported by OpenAI's structured outputs in strict mode. This test
428+
verifies that the pattern keyword is properly stripped from the schema.
429+
430+
Fixes issue #2718
431+
"""
432+
if not PYDANTIC_V1:
433+
schema = to_strict_json_schema(InsuranceQuote)
434+
435+
# Verify the schema structure exists
436+
assert "properties" in schema
437+
assert "premium" in schema["properties"]
438+
439+
# Get the premium field schema
440+
premium_schema = schema["properties"]["premium"]
441+
442+
# Verify it's an anyOf with number/string/null options
443+
assert "anyOf" in premium_schema
444+
445+
# Check all variants in the anyOf for 'pattern' keyword
446+
# Pattern should NOT be present after our fix
447+
for variant in premium_schema["anyOf"]:
448+
assert "pattern" not in variant, (
449+
"Pattern keyword should be stripped from Decimal field schema. "
450+
"Found pattern in variant: " + str(variant)
451+
)
452+
453+
# Verify the schema matches expected structure (without pattern)
454+
assert schema == snapshot(
455+
{
456+
"title": "InsuranceQuote",
457+
"type": "object",
458+
"properties": {
459+
"premium": {
460+
"anyOf": [
461+
{"type": "number"},
462+
{"type": "string"},
463+
{"type": "null"}
464+
],
465+
"description": "The insurance premium amount",
466+
"title": "Premium",
467+
},
468+
"coverage_amount": {
469+
"description": "The coverage amount",
470+
"title": "Coverage Amount",
471+
"type": "number",
472+
},
473+
"customer_name": {
474+
"description": "The customer's name",
475+
"title": "Customer Name",
476+
"type": "string",
477+
},
478+
},
479+
"required": ["premium", "coverage_amount", "customer_name"],
480+
"additionalProperties": False,
481+
}
482+
)

0 commit comments

Comments
 (0)