From 1a36a0cdd48183adc32903d9b3df7ea8ccab1bd7 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Wed, 5 Nov 2025 11:09:30 -0800 Subject: [PATCH] Remove unwrap_tensor_subclass --- .ci/docker/ci_commit_pins/pytorch.txt | 2 +- backends/test/harness/stages/quantize.py | 4 ---- backends/vulkan/test/test_vulkan_delegate.py | 4 ---- backends/xnnpack/test/ops/test_linear.py | 2 -- examples/apple/coreml/llama/export.py | 3 --- examples/models/llama/source_transformation/quantize.py | 7 ------- export/stages.py | 2 -- export/tests/test_export_stages.py | 8 +------- extension/llm/export/builder.py | 6 ------ torch_pin.py | 2 +- 10 files changed, 3 insertions(+), 37 deletions(-) diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index a8de771a69d..5664a975587 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -e6f766c7d750d40603eee3f66c5915bac606b3ea +72df1db744431d24ee1a4c0e42e514426ce0d45f diff --git a/backends/test/harness/stages/quantize.py b/backends/test/harness/stages/quantize.py index 6c6036c8104..40505a4e4c0 100644 --- a/backends/test/harness/stages/quantize.py +++ b/backends/test/harness/stages/quantize.py @@ -16,7 +16,6 @@ ) from torchao.quantization.pt2e.quantizer import Quantizer from torchao.quantization.quant_api import quantize_ -from torchao.utils import unwrap_tensor_subclass class Quantize(Stage): @@ -111,9 +110,6 @@ def run( # Apply quantize_ to the model quantize_(artifact, self.config, self.filter_fn) - # Unwrap tensor subclasses for export compatibility - unwrap_tensor_subclass(artifact) - self.quantized_module = artifact @property diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index f92cea64767..3faa131d5b7 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -43,7 +43,6 @@ from torchao.quantization.pt2e.quantizer import Quantizer from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_ -from torchao.utils import unwrap_tensor_subclass try: ctypes.CDLL("libvulkan.so.1") @@ -2363,7 +2362,6 @@ def apply_quantization(self): granularity=self.quant_granularity, ) quantize_(self, q_config) - unwrap_tensor_subclass(self) return self # Test with GEMV pattern (batch_size=1, seq_len=1) @@ -2686,7 +2684,6 @@ def apply_8da4w_quantization(self): quantize_, ) from torchao.quantization.granularity import PerGroup - from torchao.utils import unwrap_tensor_subclass quantize_( self, @@ -2694,7 +2691,6 @@ def apply_8da4w_quantization(self): weight_dtype=torch.int4, granularity=PerGroup(self.group_size) ), ) - unwrap_tensor_subclass(self) return self # Test with GEMV pattern (batch_size=1, seq_len=1) diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index dc92a9542a9..14a3460e1f7 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -39,7 +39,6 @@ Int8DynamicActivationIntxWeightConfig, quantize_, ) - from torchao.utils import unwrap_tensor_subclass torchao_installed = True except: @@ -400,7 +399,6 @@ def _test_groupwise_dq_linear( weight_granularity=PerGroup(group_size), ), ) - unwrap_tensor_subclass(mod) DynamicallyQuantizedPartitioner = XnnpackPartitioner( config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, per_op_mode=True, diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py index af2fa3c74ee..84460a9bf54 100644 --- a/examples/apple/coreml/llama/export.py +++ b/examples/apple/coreml/llama/export.py @@ -28,7 +28,6 @@ from torchao.quantization.granularity import PerAxis, PerGroup from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_ -from torchao.utils import unwrap_tensor_subclass def main() -> None: @@ -193,8 +192,6 @@ def main() -> None: ) example_inputs = input_manager.get_inputs(tokens=[0]) - model = unwrap_tensor_subclass(model) - ep = torch.export.export(model, example_inputs, strict=True) print("Exported program") print(ep) diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index 9e49f9e4e15..8c0e63953e9 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -122,7 +122,6 @@ def quantize( # noqa C901 Int8DynamicActivationIntxWeightConfig, quantize_, ) - from torchao.utils import unwrap_tensor_subclass with torch.no_grad(): # Computation dtype is fixed to fp32 in the implementation of quantize_, so @@ -142,7 +141,6 @@ def quantize( # noqa C901 ), ), ) - model = unwrap_tensor_subclass(model) if verbose: print("quantized model:", model) return model @@ -156,7 +154,6 @@ def quantize( # noqa C901 quantize_, ) from torchao.quantization.granularity import PerGroup - from torchao.utils import unwrap_tensor_subclass def filter_fn(m, fqn): is_linear = isinstance(m, nn.Linear) @@ -181,8 +178,6 @@ def filter_fn(m, fqn): filter_fn=filter_fn, ) - model = unwrap_tensor_subclass(model) - # TODO: deal with checkpoint / computation dtype decoupling. if verbose: @@ -191,7 +186,6 @@ def filter_fn(m, fqn): elif qmode == "4w": from torchao.quantization.granularity import PerGroup from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_ - from torchao.utils import unwrap_tensor_subclass q_group_size = 256 if group_size is None else group_size q_config = IntxWeightOnlyConfig( @@ -204,7 +198,6 @@ def filter_fn(m, fqn): ), ) quantize_(model, q_config) - model = unwrap_tensor_subclass(model) return model else: diff --git a/export/stages.py b/export/stages.py index 3be801c6a14..3499b5eb655 100644 --- a/export/stages.py +++ b/export/stages.py @@ -26,7 +26,6 @@ ComposableQuantizer, Quantizer as TorchAOPT2EQuantizer, ) -from torchao.utils import unwrap_tensor_subclass class PipelineArtifact: @@ -344,7 +343,6 @@ def run(self, artifact: PipelineArtifact) -> None: ao_config = self._quantization_recipe.ao_quantization_configs[0] quantize_(model, ao_config.ao_base_config, ao_config.filter_fn) - unwrap_tensor_subclass(model) self._artifact = artifact.copy_with_new_data(self._transformed_models) diff --git a/export/tests/test_export_stages.py b/export/tests/test_export_stages.py index 4e8144bd487..02a21788535 100644 --- a/export/tests/test_export_stages.py +++ b/export/tests/test_export_stages.py @@ -280,10 +280,7 @@ def test_source_transform_stage_no_quantization(self) -> None: self.assertEqual(result_artifact.data, self.models_dict) @patch("executorch.export.stages.quantize_") - @patch("executorch.export.stages.unwrap_tensor_subclass") - def test_run_with_ao_quantization_configs( - self, mock_unwrap: Mock, mock_quantize: Mock - ) -> None: + def test_run_with_ao_quantization_configs(self, mock_quantize: Mock) -> None: from torchao.core.config import AOBaseConfig mock_config = Mock(spec=AOBaseConfig) @@ -308,9 +305,6 @@ def test_run_with_ao_quantization_configs( self.assertEqual(call_args[1], mock_config) self.assertEqual(call_args[2], mock_filter_fn) - # Verify unwrap_tensor_subclass was called once (with the copied model) - self.assertEqual(mock_unwrap.call_count, 1) - # Verify that the original models_dict is unchanged self.assertEqual(models_dict, {"forward": self.model}) diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py index f8c556f351c..b829239d1e2 100644 --- a/extension/llm/export/builder.py +++ b/extension/llm/export/builder.py @@ -38,7 +38,6 @@ from torch.nn.attention import SDPBackend from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer -from torchao.utils import unwrap_tensor_subclass FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" logging.basicConfig(level=logging.INFO, format=FORMAT) @@ -203,11 +202,6 @@ def _get_edge_config(self) -> EdgeCompileConfig: return edge_config def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram: - if module is not None: - unwrap_tensor_subclass(module) - else: - unwrap_tensor_subclass(self.model) - dynamic_shape = self._get_dynamic_shape() # 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing # 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up) diff --git a/torch_pin.py b/torch_pin.py index 5e54c848d13..7d20cbcf074 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ TORCH_VERSION = "2.10.0" -NIGHTLY_VERSION = "dev20251015" +NIGHTLY_VERSION = "dev20251104"