Fix: Strip unsupported JSON Schema keywords for structured outputs

Yashwant Bezawada · Yashwant Bezawada · commit fdde518b5c8a · 2025-11-05T09:58:26.000-06:00
Resolves #2718 where Decimal fields caused 500 errors with responses.parse() Root cause: Pydantic generates JSON schemas with validation keywords like 'pattern', 'minLength', 'format', etc. that are not supported by OpenAI's structured outputs in strict mode. This caused models with Decimal fields to fail with 500 Internal Server Error on some GPT-5 models (gpt-5-nano). Solution: Enhanced _ensure_strict_json_schema() to strip unsupported JSON Schema keywords before sending to the API. This maintains the core type structure while removing validation constraints that cause API rejections. Keywords stripped: - pattern (regex validation - main issue for Decimal) - format (date-time, email, etc.) - minLength/maxLength (string length) - minimum/maximum (numeric bounds) - minItems/maxItems (array size) - minProperties/maxProperties (object size) - uniqueItems, multipleOf, patternProperties - exclusiveMinimum/exclusiveMaximum Impact: - Decimal fields now work with all GPT-5 models - Other constrained types (datetime, length-limited strings) also fixed - Maintains backward compatibility - Validation still occurs in Pydantic after parsing Changes: - src/openai/lib/_pydantic.py: Added keyword stripping logic - tests/lib/test_pydantic.py: Added test for Decimal field handling Test results: - Decimal schemas no longer contain 'pattern' keyword - Schema structure preserved (anyOf with number/string) - All model types (String, Float, Decimal) generate valid schemas
diff --git a/src/openai/lib/_pydantic.py b/src/openai/lib/_pydantic.py
@@ -112,6 +112,30 @@ def _ensure_strict_json_schema(
         # we call `_ensure_strict_json_schema` again to fix the inlined schema and ensure it's valid.
         return _ensure_strict_json_schema(json_schema, path=path, root=root)
 
+    # Remove JSON Schema keywords that are not supported by OpenAI's structured outputs
+    # These keywords are used for validation but cause errors with strict mode
+    # See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+    unsupported_keywords = [
+        "pattern",         # Regex patterns (e.g., from Decimal fields)
+        "format",          # String formats like "date-time"
+        "minLength",       # String length constraints
+        "maxLength",       # String length constraints
+        "minimum",         # Numeric minimum values
+        "maximum",         # Numeric maximum values
+        "exclusiveMinimum",  # Exclusive numeric bounds
+        "exclusiveMaximum",  # Exclusive numeric bounds
+        "multipleOf",      # Numeric multiple constraints
+        "patternProperties",  # Pattern-based object properties
+        "minItems",        # Array size constraints
+        "maxItems",        # Array size constraints
+        "minProperties",   # Object property count constraints
+        "maxProperties",   # Object property count constraints
+        "uniqueItems",     # Array uniqueness constraints
+    ]
+
+    for keyword in unsupported_keywords:
+        json_schema.pop(keyword, None)
+
     return json_schema
 
 
diff --git a/tests/lib/test_pydantic.py b/tests/lib/test_pydantic.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from decimal import Decimal
 from enum import Enum
 
 from pydantic import Field, BaseModel
@@ -409,3 +410,73 @@ def test_nested_inline_ref_expansion() -> None:
                 "additionalProperties": False,
             }
         )
+
+
+class InsuranceQuote(BaseModel):
+    """Test model with Decimal field to verify pattern keyword is stripped"""
+    premium: Decimal = Field(description="The insurance premium amount")
+    coverage_amount: float = Field(description="The coverage amount")
+    customer_name: str = Field(description="The customer's name")
+
+
+def test_decimal_field_strips_pattern() -> None:
+    """
+    Test that Decimal fields do not include unsupported 'pattern' keyword.
+
+    Pydantic generates a regex pattern for Decimal fields by default, but this
+    is not supported by OpenAI's structured outputs in strict mode. This test
+    verifies that the pattern keyword is properly stripped from the schema.
+
+    Fixes issue #2718
+    """
+    if not PYDANTIC_V1:
+        schema = to_strict_json_schema(InsuranceQuote)
+
+        # Verify the schema structure exists
+        assert "properties" in schema
+        assert "premium" in schema["properties"]
+
+        # Get the premium field schema
+        premium_schema = schema["properties"]["premium"]
+
+        # Verify it's an anyOf with number/string/null options
+        assert "anyOf" in premium_schema
+
+        # Check all variants in the anyOf for 'pattern' keyword
+        # Pattern should NOT be present after our fix
+        for variant in premium_schema["anyOf"]:
+            assert "pattern" not in variant, (
+                "Pattern keyword should be stripped from Decimal field schema. "
+                "Found pattern in variant: " + str(variant)
+            )
+
+        # Verify the schema matches expected structure (without pattern)
+        assert schema == snapshot(
+            {
+                "title": "InsuranceQuote",
+                "type": "object",
+                "properties": {
+                    "premium": {
+                        "anyOf": [
+                            {"type": "number"},
+                            {"type": "string"},
+                            {"type": "null"}
+                        ],
+                        "description": "The insurance premium amount",
+                        "title": "Premium",
+                    },
+                    "coverage_amount": {
+                        "description": "The coverage amount",
+                        "title": "Coverage Amount",
+                        "type": "number",
+                    },
+                    "customer_name": {
+                        "description": "The customer's name",
+                        "title": "Customer Name",
+                        "type": "string",
+                    },
+                },
+                "required": ["premium", "coverage_amount", "customer_name"],
+                "additionalProperties": False,
+            }
+        )