From 1a36a0cdd48183adc32903d9b3df7ea8ccab1bd7 Mon Sep 17 00:00:00 2001
From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:09:30 -0800
Subject: [PATCH] Remove unwrap_tensor_subclass

---
 .ci/docker/ci_commit_pins/pytorch.txt                   | 2 +-
 backends/test/harness/stages/quantize.py                | 4 ----
 backends/vulkan/test/test_vulkan_delegate.py            | 4 ----
 backends/xnnpack/test/ops/test_linear.py                | 2 --
 examples/apple/coreml/llama/export.py                   | 3 ---
 examples/models/llama/source_transformation/quantize.py | 7 -------
 export/stages.py                                        | 2 --
 export/tests/test_export_stages.py                      | 8 +-------
 extension/llm/export/builder.py                         | 6 ------
 torch_pin.py                                            | 2 +-
 10 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index a8de771a69d..5664a975587 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-e6f766c7d750d40603eee3f66c5915bac606b3ea
+72df1db744431d24ee1a4c0e42e514426ce0d45f
diff --git a/backends/test/harness/stages/quantize.py b/backends/test/harness/stages/quantize.py
index 6c6036c8104..40505a4e4c0 100644
--- a/backends/test/harness/stages/quantize.py
+++ b/backends/test/harness/stages/quantize.py
@@ -16,7 +16,6 @@
 )
 from torchao.quantization.pt2e.quantizer import Quantizer
 from torchao.quantization.quant_api import quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 
 class Quantize(Stage):
@@ -111,9 +110,6 @@ def run(
         # Apply quantize_ to the model
         quantize_(artifact, self.config, self.filter_fn)
 
-        # Unwrap tensor subclasses for export compatibility
-        unwrap_tensor_subclass(artifact)
-
         self.quantized_module = artifact
 
     @property
diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index f92cea64767..3faa131d5b7 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -43,7 +43,6 @@
 
 from torchao.quantization.pt2e.quantizer import Quantizer
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 try:
     ctypes.CDLL("libvulkan.so.1")
@@ -2363,7 +2362,6 @@ def apply_quantization(self):
                     granularity=self.quant_granularity,
                 )
                 quantize_(self, q_config)
-                unwrap_tensor_subclass(self)
                 return self
 
         # Test with GEMV pattern (batch_size=1, seq_len=1)
@@ -2686,7 +2684,6 @@ def apply_8da4w_quantization(self):
                     quantize_,
                 )
                 from torchao.quantization.granularity import PerGroup
-                from torchao.utils import unwrap_tensor_subclass
 
                 quantize_(
                     self,
@@ -2694,7 +2691,6 @@ def apply_8da4w_quantization(self):
                         weight_dtype=torch.int4, granularity=PerGroup(self.group_size)
                     ),
                 )
-                unwrap_tensor_subclass(self)
                 return self
 
         # Test with GEMV pattern (batch_size=1, seq_len=1)
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
index dc92a9542a9..14a3460e1f7 100644
--- a/backends/xnnpack/test/ops/test_linear.py
+++ b/backends/xnnpack/test/ops/test_linear.py
@@ -39,7 +39,6 @@
         Int8DynamicActivationIntxWeightConfig,
         quantize_,
     )
-    from torchao.utils import unwrap_tensor_subclass
 
     torchao_installed = True
 except:
@@ -400,7 +399,6 @@ def _test_groupwise_dq_linear(
                 weight_granularity=PerGroup(group_size),
             ),
         )
-        unwrap_tensor_subclass(mod)
         DynamicallyQuantizedPartitioner = XnnpackPartitioner(
             config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
             per_op_mode=True,
diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py
index af2fa3c74ee..84460a9bf54 100644
--- a/examples/apple/coreml/llama/export.py
+++ b/examples/apple/coreml/llama/export.py
@@ -28,7 +28,6 @@
 
 from torchao.quantization.granularity import PerAxis, PerGroup
 from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-from torchao.utils import unwrap_tensor_subclass
 
 
 def main() -> None:
@@ -193,8 +192,6 @@ def main() -> None:
     )
     example_inputs = input_manager.get_inputs(tokens=[0])
 
-    model = unwrap_tensor_subclass(model)
-
     ep = torch.export.export(model, example_inputs, strict=True)
     print("Exported program")
     print(ep)
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
index 9e49f9e4e15..8c0e63953e9 100644
--- a/examples/models/llama/source_transformation/quantize.py
+++ b/examples/models/llama/source_transformation/quantize.py
@@ -122,7 +122,6 @@ def quantize(  # noqa C901
             Int8DynamicActivationIntxWeightConfig,
             quantize_,
         )
-        from torchao.utils import unwrap_tensor_subclass
 
         with torch.no_grad():
             # Computation dtype is fixed to fp32 in the implementation of quantize_, so
@@ -142,7 +141,6 @@ def quantize(  # noqa C901
                     ),
                 ),
             )
-            model = unwrap_tensor_subclass(model)
         if verbose:
             print("quantized model:", model)
         return model
@@ -156,7 +154,6 @@ def quantize(  # noqa C901
             quantize_,
         )
         from torchao.quantization.granularity import PerGroup
-        from torchao.utils import unwrap_tensor_subclass
 
         def filter_fn(m, fqn):
             is_linear = isinstance(m, nn.Linear)
@@ -181,8 +178,6 @@ def filter_fn(m, fqn):
             filter_fn=filter_fn,
         )
 
-        model = unwrap_tensor_subclass(model)
-
         # TODO: deal with checkpoint / computation dtype decoupling.
 
         if verbose:
@@ -191,7 +186,6 @@ def filter_fn(m, fqn):
     elif qmode == "4w":
         from torchao.quantization.granularity import PerGroup
         from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
-        from torchao.utils import unwrap_tensor_subclass
 
         q_group_size = 256 if group_size is None else group_size
         q_config = IntxWeightOnlyConfig(
@@ -204,7 +198,6 @@ def filter_fn(m, fqn):
             ),
         )
         quantize_(model, q_config)
-        model = unwrap_tensor_subclass(model)
 
         return model
     else:
diff --git a/export/stages.py b/export/stages.py
index 3be801c6a14..3499b5eb655 100644
--- a/export/stages.py
+++ b/export/stages.py
@@ -26,7 +26,6 @@
     ComposableQuantizer,
     Quantizer as TorchAOPT2EQuantizer,
 )
-from torchao.utils import unwrap_tensor_subclass
 
 
 class PipelineArtifact:
@@ -344,7 +343,6 @@ def run(self, artifact: PipelineArtifact) -> None:
 
             ao_config = self._quantization_recipe.ao_quantization_configs[0]
             quantize_(model, ao_config.ao_base_config, ao_config.filter_fn)
-            unwrap_tensor_subclass(model)
 
         self._artifact = artifact.copy_with_new_data(self._transformed_models)
 
diff --git a/export/tests/test_export_stages.py b/export/tests/test_export_stages.py
index 4e8144bd487..02a21788535 100644
--- a/export/tests/test_export_stages.py
+++ b/export/tests/test_export_stages.py
@@ -280,10 +280,7 @@ def test_source_transform_stage_no_quantization(self) -> None:
         self.assertEqual(result_artifact.data, self.models_dict)
 
     @patch("executorch.export.stages.quantize_")
-    @patch("executorch.export.stages.unwrap_tensor_subclass")
-    def test_run_with_ao_quantization_configs(
-        self, mock_unwrap: Mock, mock_quantize: Mock
-    ) -> None:
+    def test_run_with_ao_quantization_configs(self, mock_quantize: Mock) -> None:
         from torchao.core.config import AOBaseConfig
 
         mock_config = Mock(spec=AOBaseConfig)
@@ -308,9 +305,6 @@ def test_run_with_ao_quantization_configs(
         self.assertEqual(call_args[1], mock_config)
         self.assertEqual(call_args[2], mock_filter_fn)
 
-        # Verify unwrap_tensor_subclass was called once (with the copied model)
-        self.assertEqual(mock_unwrap.call_count, 1)
-
         # Verify that the original models_dict is unchanged
         self.assertEqual(models_dict, {"forward": self.model})
 
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
index f8c556f351c..b829239d1e2 100644
--- a/extension/llm/export/builder.py
+++ b/extension/llm/export/builder.py
@@ -38,7 +38,6 @@
 from torch.nn.attention import SDPBackend
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
-from torchao.utils import unwrap_tensor_subclass
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
 logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -203,11 +202,6 @@ def _get_edge_config(self) -> EdgeCompileConfig:
         return edge_config
 
     def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
-        if module is not None:
-            unwrap_tensor_subclass(module)
-        else:
-            unwrap_tensor_subclass(self.model)
-
         dynamic_shape = self._get_dynamic_shape()
         # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
         # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
diff --git a/torch_pin.py b/torch_pin.py
index 5e54c848d13..7d20cbcf074 100644
--- a/torch_pin.py
+++ b/torch_pin.py
@@ -1,2 +1,2 @@
 TORCH_VERSION = "2.10.0"
-NIGHTLY_VERSION = "dev20251015"
+NIGHTLY_VERSION = "dev20251104"