Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e6f766c7d750d40603eee3f66c5915bac606b3ea
72df1db744431d24ee1a4c0e42e514426ce0d45f
4 changes: 0 additions & 4 deletions backends/test/harness/stages/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
)
from torchao.quantization.pt2e.quantizer import Quantizer
from torchao.quantization.quant_api import quantize_
from torchao.utils import unwrap_tensor_subclass


class Quantize(Stage):
Expand Down Expand Up @@ -111,9 +110,6 @@ def run(
# Apply quantize_ to the model
quantize_(artifact, self.config, self.filter_fn)

# Unwrap tensor subclasses for export compatibility
unwrap_tensor_subclass(artifact)

self.quantized_module = artifact

@property
Expand Down
4 changes: 0 additions & 4 deletions backends/vulkan/test/test_vulkan_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@

from torchao.quantization.pt2e.quantizer import Quantizer
from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
from torchao.utils import unwrap_tensor_subclass

try:
ctypes.CDLL("libvulkan.so.1")
Expand Down Expand Up @@ -2363,7 +2362,6 @@ def apply_quantization(self):
granularity=self.quant_granularity,
)
quantize_(self, q_config)
unwrap_tensor_subclass(self)
return self

# Test with GEMV pattern (batch_size=1, seq_len=1)
Expand Down Expand Up @@ -2686,15 +2684,13 @@ def apply_8da4w_quantization(self):
quantize_,
)
from torchao.quantization.granularity import PerGroup
from torchao.utils import unwrap_tensor_subclass

quantize_(
self,
Int8DynamicActivationIntxWeightConfig(
weight_dtype=torch.int4, granularity=PerGroup(self.group_size)
),
)
unwrap_tensor_subclass(self)
return self

# Test with GEMV pattern (batch_size=1, seq_len=1)
Expand Down
2 changes: 0 additions & 2 deletions backends/xnnpack/test/ops/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
Int8DynamicActivationIntxWeightConfig,
quantize_,
)
from torchao.utils import unwrap_tensor_subclass

torchao_installed = True
except:
Expand Down Expand Up @@ -400,7 +399,6 @@ def _test_groupwise_dq_linear(
weight_granularity=PerGroup(group_size),
),
)
unwrap_tensor_subclass(mod)
DynamicallyQuantizedPartitioner = XnnpackPartitioner(
config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
per_op_mode=True,
Expand Down
3 changes: 0 additions & 3 deletions examples/apple/coreml/llama/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

from torchao.quantization.granularity import PerAxis, PerGroup
from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
from torchao.utils import unwrap_tensor_subclass


def main() -> None:
Expand Down Expand Up @@ -193,8 +192,6 @@ def main() -> None:
)
example_inputs = input_manager.get_inputs(tokens=[0])

model = unwrap_tensor_subclass(model)

ep = torch.export.export(model, example_inputs, strict=True)
print("Exported program")
print(ep)
Expand Down
7 changes: 0 additions & 7 deletions examples/models/llama/source_transformation/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ def quantize( # noqa C901
Int8DynamicActivationIntxWeightConfig,
quantize_,
)
from torchao.utils import unwrap_tensor_subclass

with torch.no_grad():
# Computation dtype is fixed to fp32 in the implementation of quantize_, so
Expand All @@ -142,7 +141,6 @@ def quantize( # noqa C901
),
),
)
model = unwrap_tensor_subclass(model)
if verbose:
print("quantized model:", model)
return model
Expand All @@ -156,7 +154,6 @@ def quantize( # noqa C901
quantize_,
)
from torchao.quantization.granularity import PerGroup
from torchao.utils import unwrap_tensor_subclass

def filter_fn(m, fqn):
is_linear = isinstance(m, nn.Linear)
Expand All @@ -181,8 +178,6 @@ def filter_fn(m, fqn):
filter_fn=filter_fn,
)

model = unwrap_tensor_subclass(model)

# TODO: deal with checkpoint / computation dtype decoupling.

if verbose:
Expand All @@ -191,7 +186,6 @@ def filter_fn(m, fqn):
elif qmode == "4w":
from torchao.quantization.granularity import PerGroup
from torchao.quantization.quant_api import IntxWeightOnlyConfig, quantize_
from torchao.utils import unwrap_tensor_subclass

q_group_size = 256 if group_size is None else group_size
q_config = IntxWeightOnlyConfig(
Expand All @@ -204,7 +198,6 @@ def filter_fn(m, fqn):
),
)
quantize_(model, q_config)
model = unwrap_tensor_subclass(model)

return model
else:
Expand Down
2 changes: 0 additions & 2 deletions export/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
ComposableQuantizer,
Quantizer as TorchAOPT2EQuantizer,
)
from torchao.utils import unwrap_tensor_subclass


class PipelineArtifact:
Expand Down Expand Up @@ -344,7 +343,6 @@ def run(self, artifact: PipelineArtifact) -> None:

ao_config = self._quantization_recipe.ao_quantization_configs[0]
quantize_(model, ao_config.ao_base_config, ao_config.filter_fn)
unwrap_tensor_subclass(model)

self._artifact = artifact.copy_with_new_data(self._transformed_models)

Expand Down
8 changes: 1 addition & 7 deletions export/tests/test_export_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,7 @@ def test_source_transform_stage_no_quantization(self) -> None:
self.assertEqual(result_artifact.data, self.models_dict)

@patch("executorch.export.stages.quantize_")
@patch("executorch.export.stages.unwrap_tensor_subclass")
def test_run_with_ao_quantization_configs(
self, mock_unwrap: Mock, mock_quantize: Mock
) -> None:
def test_run_with_ao_quantization_configs(self, mock_quantize: Mock) -> None:
from torchao.core.config import AOBaseConfig

mock_config = Mock(spec=AOBaseConfig)
Expand All @@ -308,9 +305,6 @@ def test_run_with_ao_quantization_configs(
self.assertEqual(call_args[1], mock_config)
self.assertEqual(call_args[2], mock_filter_fn)

# Verify unwrap_tensor_subclass was called once (with the copied model)
self.assertEqual(mock_unwrap.call_count, 1)

# Verify that the original models_dict is unchanged
self.assertEqual(models_dict, {"forward": self.model})

Expand Down
6 changes: 0 additions & 6 deletions extension/llm/export/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from torch.nn.attention import SDPBackend
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
from torchao.utils import unwrap_tensor_subclass

FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
Expand Down Expand Up @@ -203,11 +202,6 @@ def _get_edge_config(self) -> EdgeCompileConfig:
return edge_config

def _export(self, module: Optional[torch.nn.Module] = None) -> ExportedProgram:
if module is not None:
unwrap_tensor_subclass(module)
else:
unwrap_tensor_subclass(self.model)

dynamic_shape = self._get_dynamic_shape()
# 1. torch.nn.attention.sdpa_kernel([SDPBackend.MATH]) is for bypassing the dynamo error when tracing
# 2. torch.no_grad() is for getting rid of the dropout (not sure why training ops will show up)
Expand Down
2 changes: 1 addition & 1 deletion torch_pin.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
TORCH_VERSION = "2.10.0"
NIGHTLY_VERSION = "dev20251015"
NIGHTLY_VERSION = "dev20251104"
Loading