From 6aa17368e7821a64253c360419bf4804bdccf594 Mon Sep 17 00:00:00 2001 From: DN6 Date: Mon, 3 Nov 2025 15:11:45 +0530 Subject: [PATCH 1/2] update --- ...st_modular_pipeline_stable_diffusion_xl.py | 190 ++++++++++-------- .../test_modular_pipelines_common.py | 49 ++--- 2 files changed, 123 insertions(+), 116 deletions(-) diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py index 22347aa5589c..f8d8185a8865 100644 --- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py +++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py @@ -14,7 +14,6 @@ # limitations under the License. import random -import unittest from typing import Any, Dict import numpy as np @@ -32,37 +31,13 @@ enable_full_determinism() -class SDXLModularTests: +class SDXLModularTesterMixin: """ This mixin defines method to create pipeline, base input and base test across all SDXL modular tests. """ - pipeline_class = StableDiffusionXLModularPipeline - pipeline_blocks_class = StableDiffusionXLAutoBlocks - repo = "hf-internal-testing/tiny-sdxl-modular" - params = frozenset( - [ - "prompt", - "height", - "width", - "negative_prompt", - "cross_attention_kwargs", - "image", - "mask_image", - ] - ) - batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) - - def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): - pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(torch_dtype=torch_dtype) - return pipeline - - def get_dummy_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device=device).manual_seed(seed) + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) inputs = { "prompt": "A painting of a squirrel eating a burger", "generator": generator, @@ -72,23 +47,22 @@ def get_dummy_inputs(self, device, seed=0): return inputs def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2): - device = "cpu" # ensure determinism for the device-dependent torch.Generator sd_pipe = self.get_pipeline() - sd_pipe = sd_pipe.to(device) + sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(device) + inputs = self.get_dummy_inputs() image = sd_pipe(**inputs, output="images") image_slice = image[0, -3:, -3:, -1] assert image.shape == expected_image_shape - assert np.abs(image_slice.flatten() - expected_slice).max() < expected_max_diff, ( - "Image Slice does not match expected slice" - ) + assert ( + torch.abs(image_slice.flatten() - expected_slice).max() < expected_max_diff + ), "Image Slice does not match expected slice" -class SDXLModularIPAdapterTests: +class SDXLModularIPAdapterTesterMixin: """ This mixin is designed to test IP Adapter. """ @@ -98,16 +72,16 @@ def test_pipeline_inputs_and_blocks(self): parameters = blocks.input_names assert issubclass(self.pipeline_class, ModularIPAdapterMixin) - assert "ip_adapter_image" in parameters, ( - "`ip_adapter_image` argument must be supported by the `__call__` method" - ) + assert ( + "ip_adapter_image" in parameters + ), "`ip_adapter_image` argument must be supported by the `__call__` method" assert "ip_adapter" in blocks.sub_blocks, "pipeline must contain an IPAdapter block" _ = blocks.sub_blocks.pop("ip_adapter") parameters = blocks.input_names - assert "ip_adapter_image" not in parameters, ( - "`ip_adapter_image` argument must be removed from the `__call__` method" - ) + assert ( + "ip_adapter_image" not in parameters + ), "`ip_adapter_image` argument must be removed from the `__call__` method" def _get_dummy_image_embeds(self, cross_attention_dim: int = 32): return torch.randn((1, 1, cross_attention_dim), device=torch_device) @@ -152,7 +126,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N cross_attention_dim = pipe.unet.config.get("cross_attention_dim") # forward pass without ip adapter - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) if expected_pipe_slice is None: output_without_adapter = pipe(**inputs, output="images") else: @@ -163,7 +137,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.unet._load_ip_adapter_weights(adapter_state_dict) # forward pass with single ip adapter, but scale=0 which should have no effect - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] pipe.set_ip_adapter_scale(0.0) @@ -172,7 +146,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_without_adapter_scale = output_without_adapter_scale[0, -3:, -3:, -1].flatten() # forward pass with single ip adapter, but with scale of adapter weights - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] pipe.set_ip_adapter_scale(42.0) @@ -183,9 +157,9 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N max_diff_without_adapter_scale = np.abs(output_without_adapter_scale - output_without_adapter).max() max_diff_with_adapter_scale = np.abs(output_with_adapter_scale - output_without_adapter).max() - assert max_diff_without_adapter_scale < expected_max_diff, ( - "Output without ip-adapter must be same as normal inference" - ) + assert ( + max_diff_without_adapter_scale < expected_max_diff + ), "Output without ip-adapter must be same as normal inference" assert max_diff_with_adapter_scale > 1e-2, "Output with ip-adapter must be different from normal inference" # 2. Multi IP-Adapter test cases @@ -194,7 +168,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.unet._load_ip_adapter_weights([adapter_state_dict_1, adapter_state_dict_2]) # forward pass with multi ip adapter, but scale=0 which should have no effect - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 pipe.set_ip_adapter_scale([0.0, 0.0]) @@ -203,7 +177,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_without_multi_adapter_scale = output_without_multi_adapter_scale[0, -3:, -3:, -1].flatten() # forward pass with multi ip adapter, but with scale of adapter weights - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 pipe.set_ip_adapter_scale([42.0, 42.0]) @@ -215,15 +189,15 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_without_multi_adapter_scale - output_without_adapter ).max() max_diff_with_multi_adapter_scale = np.abs(output_with_multi_adapter_scale - output_without_adapter).max() - assert max_diff_without_multi_adapter_scale < expected_max_diff, ( - "Output without multi-ip-adapter must be same as normal inference" - ) - assert max_diff_with_multi_adapter_scale > 1e-2, ( - "Output with multi-ip-adapter scale must be different from normal inference" - ) + assert ( + max_diff_without_multi_adapter_scale < expected_max_diff + ), "Output without multi-ip-adapter must be same as normal inference" + assert ( + max_diff_with_multi_adapter_scale > 1e-2 + ), "Output with multi-ip-adapter scale must be different from normal inference" -class SDXLModularControlNetTests: +class SDXLModularControlNetTesterMixin: """ This mixin is designed to test ControlNet. """ @@ -233,9 +207,9 @@ def test_pipeline_inputs(self): parameters = blocks.input_names assert "control_image" in parameters, "`control_image` argument must be supported by the `__call__` method" - assert "controlnet_conditioning_scale" in parameters, ( - "`controlnet_conditioning_scale` argument must be supported by the `__call__` method" - ) + assert ( + "controlnet_conditioning_scale" in parameters + ), "`controlnet_conditioning_scale` argument must be supported by the `__call__` method" def _modify_inputs_for_controlnet_test(self, inputs: Dict[str, Any]): controlnet_embedder_scale_factor = 2 @@ -262,18 +236,18 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.set_progress_bar_config(disable=None) # forward pass without controlnet - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() output_without_controlnet = pipe(**inputs, output="images") output_without_controlnet = output_without_controlnet[0, -3:, -3:, -1].flatten() # forward pass with single controlnet, but scale=0 which should have no effect - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) inputs["controlnet_conditioning_scale"] = 0.0 output_without_controlnet_scale = pipe(**inputs, output="images") output_without_controlnet_scale = output_without_controlnet_scale[0, -3:, -3:, -1].flatten() # forward pass with single controlnet, but with scale of adapter weights - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) inputs["controlnet_conditioning_scale"] = 42.0 output_with_controlnet_scale = pipe(**inputs, output="images") output_with_controlnet_scale = output_with_controlnet_scale[0, -3:, -3:, -1].flatten() @@ -281,9 +255,9 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N max_diff_without_controlnet_scale = np.abs(output_without_controlnet_scale - output_without_controlnet).max() max_diff_with_controlnet_scale = np.abs(output_with_controlnet_scale - output_without_controlnet).max() - assert max_diff_without_controlnet_scale < expected_max_diff, ( - "Output without controlnet must be same as normal inference" - ) + assert ( + max_diff_without_controlnet_scale < expected_max_diff + ), "Output without controlnet must be same as normal inference" assert max_diff_with_controlnet_scale > 1e-2, "Output with controlnet must be different from normal inference" def test_controlnet_cfg(self): @@ -295,13 +269,13 @@ def test_controlnet_cfg(self): guider = ClassifierFreeGuidance(guidance_scale=1.0) pipe.update_components(guider=guider) - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) out_no_cfg = pipe(**inputs, output="images") # forward pass with CFG applied guider = ClassifierFreeGuidance(guidance_scale=7.5) pipe.update_components(guider=guider) - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape @@ -309,7 +283,7 @@ def test_controlnet_cfg(self): assert max_diff > 1e-2, "Output with CFG must be different from normal inference" -class SDXLModularGuiderTests: +class SDXLModularGuiderTesterMixin: def test_guider_cfg(self): pipe = self.get_pipeline() pipe = pipe.to(torch_device) @@ -319,13 +293,13 @@ def test_guider_cfg(self): guider = ClassifierFreeGuidance(guidance_scale=1.0) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_no_cfg = pipe(**inputs, output="images") # forward pass with CFG applied guider = ClassifierFreeGuidance(guidance_scale=7.5) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape @@ -333,16 +307,29 @@ def test_guider_cfg(self): assert max_diff > 1e-2, "Output with CFG must be different from normal inference" -class SDXLModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, +class TestSDXLModularPipelineFast( + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ModularPipelineTesterMixin, - unittest.TestCase, ): """Test cases for Stable Diffusion XL modular pipeline fast tests.""" + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt"]) + def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( expected_image_shape=(1, 64, 64, 3), @@ -364,16 +351,30 @@ def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) -class SDXLImg2ImgModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, +class TestSDXLImg2ImgModularPipelineFast( ModularPipelineTesterMixin, - unittest.TestCase, + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ): """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests.""" + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + "image", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt", "image"]) + def get_dummy_inputs(self, device, seed=0): inputs = super().get_dummy_inputs(device, seed) image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) @@ -405,15 +406,30 @@ def test_inference_batch_single_identical(self): class SDXLInpaintingModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, ModularPipelineTesterMixin, - unittest.TestCase, + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ): """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests.""" + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + "image", + "mask_image", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + def get_dummy_inputs(self, device, seed=0): inputs = super().get_dummy_inputs(device, seed) image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index d309fcf35339..2e7481a8c8bc 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -1,9 +1,7 @@ import gc import tempfile -import unittest from typing import Callable, Union -import numpy as np import torch import diffusers @@ -19,17 +17,9 @@ ) -def to_np(tensor): - if isinstance(tensor, torch.Tensor): - tensor = tensor.detach().cpu().numpy() - - return tensor - - @require_torch class ModularPipelineTesterMixin: """ - This mixin is designed to be used with unittest.TestCase classes. It provides a set of common tests for each modular pipeline, including: - test_pipeline_call_signature: check if the pipeline's __call__ method has all required parameters @@ -57,9 +47,8 @@ class ModularPipelineTesterMixin: ] ) - def get_generator(self, seed): - device = torch_device if torch_device != "mps" else "cpu" - generator = torch.Generator(device).manual_seed(seed) + def get_generator(self, seed=0): + generator = torch.Generator("cpu").manual_seed(seed) return generator @property @@ -88,7 +77,7 @@ def get_pipeline(self): "See existing pipeline tests for reference." ) - def get_dummy_inputs(self, device, seed=0): + def get_dummy_inputs(self, seed=0): raise NotImplementedError( "You need to implement `get_dummy_inputs(self, device, seed)` in the child test class. " "See existing pipeline tests for reference." @@ -123,20 +112,23 @@ def batch_params(self) -> frozenset: "See existing pipeline tests for reference." ) - def setUp(self): + def setup_method(self): # clean up the VRAM before each test - super().setUp() torch.compiler.reset() gc.collect() backend_empty_cache(torch_device) - def tearDown(self): + def teardown_method(self): # clean up the VRAM after each test in case of CUDA runtime errors - super().tearDown() torch.compiler.reset() gc.collect() backend_empty_cache(torch_device) + def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): + pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) + pipeline.load_components(self.repo, torch_dtype=torch_dtype) + return pipeline + def test_pipeline_call_signature(self): pipe = self.get_pipeline() input_parameters = pipe.blocks.input_names @@ -144,9 +136,9 @@ def test_pipeline_call_signature(self): def _check_for_parameters(parameters, expected_parameters, param_type): remaining_parameters = {param for param in parameters if param not in expected_parameters} - assert len(remaining_parameters) == 0, ( - f"Required {param_type} parameters not present: {remaining_parameters}" - ) + assert ( + len(remaining_parameters) == 0 + ), f"Required {param_type} parameters not present: {remaining_parameters}" _check_for_parameters(self.params, input_parameters, "input") _check_for_parameters(self.optional_params, optional_parameters, "optional") @@ -229,7 +221,6 @@ def test_inference_batch_single_identical( max_diff = np.abs(to_np(output_batch[0]) - to_np(output[0])).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() @@ -274,9 +265,9 @@ def test_to_device(self): model_devices = [ component.device.type for component in pipe.components.values() if hasattr(component, "device") ] - assert all(device == torch_device for device in model_devices), ( - "All pipeline components are not on accelerator device" - ) + assert all( + device == torch_device for device in model_devices + ), "All pipeline components are not on accelerator device" def test_inference_is_not_nan_cpu(self): pipe = self.get_pipeline() @@ -284,7 +275,7 @@ def test_inference_is_not_nan_cpu(self): pipe.to("cpu") output = pipe(**self.get_dummy_inputs("cpu"), output="images") - assert np.isnan(to_np(output)).sum() == 0, "CPU Inference returns NaN" + assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" @require_accelerator def test_inference_is_not_nan(self): @@ -293,7 +284,7 @@ def test_inference_is_not_nan(self): pipe.to(torch_device) output = pipe(**self.get_dummy_inputs(torch_device), output="images") - assert np.isnan(to_np(output)).sum() == 0, "Accelerator Inference returns NaN" + assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN" def test_num_images_per_prompt(self): pipe = self.get_pipeline() @@ -334,7 +325,7 @@ def test_components_auto_cpu_offload_inference_consistent(self): image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 def test_save_from_pretrained(self): pipes = [] @@ -356,4 +347,4 @@ def test_save_from_pretrained(self): image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 From 70858dfb5363d85c87634c806e40595c67d85cf3 Mon Sep 17 00:00:00 2001 From: DN6 Date: Mon, 3 Nov 2025 18:48:10 +0530 Subject: [PATCH 2/2] update --- .../flux/test_modular_pipeline_flux.py | 110 +++++++--- ...st_modular_pipeline_stable_diffusion_xl.py | 203 ++++++++++-------- .../test_modular_pipelines_common.py | 40 ++-- 3 files changed, 208 insertions(+), 145 deletions(-) diff --git a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py index 9d70c21aa8cd..a29fd436149d 100644 --- a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py +++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py @@ -15,7 +15,6 @@ import random import tempfile -import unittest import numpy as np import PIL @@ -34,21 +33,16 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin -class FluxModularTests: +class TestFluxModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxModularPipeline pipeline_blocks_class = FluxAutoBlocks repo = "hf-internal-testing/tiny-flux-modular" - def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): - pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(torch_dtype=torch_dtype) - return pipeline + params = frozenset(["prompt", "height", "width", "guidance_scale"]) + batch_params = frozenset(["prompt"]) - def get_dummy_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device=device).manual_seed(seed) + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) inputs = { "prompt": "A painting of a squirrel eating a burger", "generator": generator, @@ -57,36 +51,47 @@ def get_dummy_inputs(self, device, seed=0): "height": 8, "width": 8, "max_sequence_length": 48, - "output_type": "np", + "output_type": "pt", } return inputs -class FluxModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): - params = frozenset(["prompt", "height", "width", "guidance_scale"]) - batch_params = frozenset(["prompt"]) - +class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin): + pipeline_class = FluxModularPipeline + pipeline_blocks_class = FluxAutoBlocks + repo = "hf-internal-testing/tiny-flux-modular" -class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) batch_params = frozenset(["prompt", "image"]) def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): pipeline = super().get_pipeline(components_manager, torch_dtype) + # Override `vae_scale_factor` here as currently, `image_processor` is initialized with # fixed constants instead of # https://github.com/huggingface/diffusers/blob/d54622c2679d700b425ad61abce9b80fc36212c0/src/diffusers/pipelines/flux/pipeline_flux_img2img.py#L230C9-L232C10 pipeline.image_processor = VaeImageProcessor(vae_scale_factor=2) return pipeline - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) - image = image / 2 + 0.5 - inputs["image"] = image - inputs["strength"] = 0.8 - inputs["height"] = 8 - inputs["width"] = 8 + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "guidance_scale": 5.0, + "height": 8, + "width": 8, + "max_sequence_length": 48, + "output_type": "pt", + } + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = PIL.Image.fromarray(np.uint8(image)).convert("RGB") + + inputs["image"] = init_image + inputs["strength"] = 0.5 + return inputs def test_save_from_pretrained(self): @@ -96,6 +101,7 @@ def test_save_from_pretrained(self): with tempfile.TemporaryDirectory() as tmpdirname: base_pipe.save_pretrained(tmpdirname) + pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) pipe.load_components(torch_dtype=torch.float32) pipe.to(torch_device) @@ -105,26 +111,62 @@ def test_save_from_pretrained(self): image_slices = [] for pipe in pipes: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 -class FluxKontextModularPipelineFastTests(FluxImg2ImgModularPipelineFastTests): +class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxKontextModularPipeline pipeline_blocks_class = FluxKontextAutoBlocks repo = "hf-internal-testing/tiny-flux-kontext-pipe" - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) + params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) + batch_params = frozenset(["prompt", "image"]) + + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "height": 8, + "width": 8, + "max_sequence_length": 48, + "output_type": "pt", + } image = PIL.Image.new("RGB", (32, 32), 0) - _ = inputs.pop("strength") + inputs["image"] = image - inputs["height"] = 8 - inputs["width"] = 8 - inputs["max_area"] = 8 * 8 + inputs["max_area"] = inputs["height"] * inputs["width"] inputs["_auto_resize"] = False + return inputs + + def test_save_from_pretrained(self): + pipes = [] + base_pipe = self.get_pipeline().to(torch_device) + pipes.append(base_pipe) + + with tempfile.TemporaryDirectory() as tmpdirname: + base_pipe.save_pretrained(tmpdirname) + + pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) + pipe.load_components(torch_dtype=torch.float32) + pipe.to(torch_device) + pipe.image_processor = VaeImageProcessor(vae_scale_factor=2) + + pipes.append(pipe) + + image_slices = [] + for pipe in pipes: + inputs = self.get_dummy_inputs() + image = pipe(**inputs, output="images") + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py index f8d8185a8865..ea54b2bdff47 100644 --- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py +++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py @@ -36,16 +36,6 @@ class SDXLModularTesterMixin: This mixin defines method to create pipeline, base input and base test across all SDXL modular tests. """ - def get_dummy_inputs(self, seed=0): - generator = self.get_generator(seed) - inputs = { - "prompt": "A painting of a squirrel eating a burger", - "generator": generator, - "num_inference_steps": 2, - "output_type": "np", - } - return inputs - def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2): sd_pipe = self.get_pipeline() sd_pipe = sd_pipe.to(torch_device) @@ -56,10 +46,8 @@ def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, image_slice = image[0, -3:, -3:, -1] assert image.shape == expected_image_shape - - assert ( - torch.abs(image_slice.flatten() - expected_slice).max() < expected_max_diff - ), "Image Slice does not match expected slice" + max_diff = torch.abs(image_slice.flatten() - expected_slice).max() + assert max_diff < expected_max_diff, f"Image slice does not match expected slice. Max Difference: {max_diff}" class SDXLModularIPAdapterTesterMixin: @@ -72,16 +60,16 @@ def test_pipeline_inputs_and_blocks(self): parameters = blocks.input_names assert issubclass(self.pipeline_class, ModularIPAdapterMixin) - assert ( - "ip_adapter_image" in parameters - ), "`ip_adapter_image` argument must be supported by the `__call__` method" + assert "ip_adapter_image" in parameters, ( + "`ip_adapter_image` argument must be supported by the `__call__` method" + ) assert "ip_adapter" in blocks.sub_blocks, "pipeline must contain an IPAdapter block" _ = blocks.sub_blocks.pop("ip_adapter") parameters = blocks.input_names - assert ( - "ip_adapter_image" not in parameters - ), "`ip_adapter_image` argument must be removed from the `__call__` method" + assert "ip_adapter_image" not in parameters, ( + "`ip_adapter_image` argument must be removed from the `__call__` method" + ) def _get_dummy_image_embeds(self, cross_attention_dim: int = 32): return torch.randn((1, 1, cross_attention_dim), device=torch_device) @@ -101,7 +89,7 @@ def _modify_inputs_for_ip_adapter_test(self, inputs: Dict[str, Any]): if "image" in parameters and "strength" in parameters: inputs["num_inference_steps"] = 4 - inputs["output_type"] = "np" + inputs["output_type"] = "pt" return inputs def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=None): @@ -154,12 +142,12 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N if expected_pipe_slice is not None: output_with_adapter_scale = output_with_adapter_scale[0, -3:, -3:, -1].flatten() - max_diff_without_adapter_scale = np.abs(output_without_adapter_scale - output_without_adapter).max() - max_diff_with_adapter_scale = np.abs(output_with_adapter_scale - output_without_adapter).max() + max_diff_without_adapter_scale = torch.abs(output_without_adapter_scale - output_without_adapter).max() + max_diff_with_adapter_scale = torch.abs(output_with_adapter_scale - output_without_adapter).max() - assert ( - max_diff_without_adapter_scale < expected_max_diff - ), "Output without ip-adapter must be same as normal inference" + assert max_diff_without_adapter_scale < expected_max_diff, ( + "Output without ip-adapter must be same as normal inference" + ) assert max_diff_with_adapter_scale > 1e-2, "Output with ip-adapter must be different from normal inference" # 2. Multi IP-Adapter test cases @@ -185,16 +173,16 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N if expected_pipe_slice is not None: output_with_multi_adapter_scale = output_with_multi_adapter_scale[0, -3:, -3:, -1].flatten() - max_diff_without_multi_adapter_scale = np.abs( + max_diff_without_multi_adapter_scale = torch.abs( output_without_multi_adapter_scale - output_without_adapter ).max() - max_diff_with_multi_adapter_scale = np.abs(output_with_multi_adapter_scale - output_without_adapter).max() - assert ( - max_diff_without_multi_adapter_scale < expected_max_diff - ), "Output without multi-ip-adapter must be same as normal inference" - assert ( - max_diff_with_multi_adapter_scale > 1e-2 - ), "Output with multi-ip-adapter scale must be different from normal inference" + max_diff_with_multi_adapter_scale = torch.abs(output_with_multi_adapter_scale - output_without_adapter).max() + assert max_diff_without_multi_adapter_scale < expected_max_diff, ( + "Output without multi-ip-adapter must be same as normal inference" + ) + assert max_diff_with_multi_adapter_scale > 1e-2, ( + "Output with multi-ip-adapter scale must be different from normal inference" + ) class SDXLModularControlNetTesterMixin: @@ -207,9 +195,9 @@ def test_pipeline_inputs(self): parameters = blocks.input_names assert "control_image" in parameters, "`control_image` argument must be supported by the `__call__` method" - assert ( - "controlnet_conditioning_scale" in parameters - ), "`controlnet_conditioning_scale` argument must be supported by the `__call__` method" + assert "controlnet_conditioning_scale" in parameters, ( + "`controlnet_conditioning_scale` argument must be supported by the `__call__` method" + ) def _modify_inputs_for_controlnet_test(self, inputs: Dict[str, Any]): controlnet_embedder_scale_factor = 2 @@ -252,12 +240,14 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_with_controlnet_scale = pipe(**inputs, output="images") output_with_controlnet_scale = output_with_controlnet_scale[0, -3:, -3:, -1].flatten() - max_diff_without_controlnet_scale = np.abs(output_without_controlnet_scale - output_without_controlnet).max() - max_diff_with_controlnet_scale = np.abs(output_with_controlnet_scale - output_without_controlnet).max() + max_diff_without_controlnet_scale = torch.abs( + output_without_controlnet_scale - output_without_controlnet + ).max() + max_diff_with_controlnet_scale = torch.abs(output_with_controlnet_scale - output_without_controlnet).max() - assert ( - max_diff_without_controlnet_scale < expected_max_diff - ), "Output without controlnet must be same as normal inference" + assert max_diff_without_controlnet_scale < expected_max_diff, ( + "Output without controlnet must be same as normal inference" + ) assert max_diff_with_controlnet_scale > 1e-2, "Output with controlnet must be different from normal inference" def test_controlnet_cfg(self): @@ -279,7 +269,7 @@ def test_controlnet_cfg(self): out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape - max_diff = np.abs(out_cfg - out_no_cfg).max() + max_diff = torch.abs(out_cfg - out_no_cfg).max() assert max_diff > 1e-2, "Output with CFG must be different from normal inference" @@ -329,21 +319,35 @@ class TestSDXLModularPipelineFast( ] ) batch_params = frozenset(["prompt", "negative_prompt"]) + expected_image_output_shape = (1, 3, 64, 64) + + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "output_type": "pt", + } + return inputs def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.5966781, - 0.62939394, - 0.48465094, - 0.51573336, - 0.57593524, - 0.47035995, - 0.53410417, - 0.51436996, - 0.47313565, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.5966781, + 0.62939394, + 0.48465094, + 0.51573336, + 0.57593524, + 0.47035995, + 0.53410417, + 0.51436996, + 0.47313565, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) @@ -352,11 +356,11 @@ def test_inference_batch_single_identical(self): class TestSDXLImg2ImgModularPipelineFast( - ModularPipelineTesterMixin, SDXLModularTesterMixin, SDXLModularIPAdapterTesterMixin, SDXLModularControlNetTesterMixin, SDXLModularGuiderTesterMixin, + ModularPipelineTesterMixin, ): """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests.""" @@ -374,30 +378,42 @@ class TestSDXLImg2ImgModularPipelineFast( ] ) batch_params = frozenset(["prompt", "negative_prompt", "image"]) + expected_image_output_shape = (1, 3, 64, 64) - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) - image = image / 2 + 0.5 - inputs["image"] = image - inputs["strength"] = 0.8 + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "output_type": "pt", + } + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + inputs["image"] = init_image + inputs["strength"] = 0.5 return inputs def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.56943184, - 0.4702148, - 0.48048905, - 0.6235963, - 0.551138, - 0.49629188, - 0.60031277, - 0.5688907, - 0.43996853, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.56943184, + 0.4702148, + 0.48048905, + 0.6235963, + 0.551138, + 0.49629188, + 0.60031277, + 0.5688907, + 0.43996853, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) @@ -406,11 +422,11 @@ def test_inference_batch_single_identical(self): class SDXLInpaintingModularPipelineFastTests( - ModularPipelineTesterMixin, SDXLModularTesterMixin, SDXLModularIPAdapterTesterMixin, SDXLModularControlNetTesterMixin, SDXLModularGuiderTesterMixin, + ModularPipelineTesterMixin, ): """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests.""" @@ -429,12 +445,20 @@ class SDXLInpaintingModularPipelineFastTests( ] ) batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + expected_image_output_shape = (1, 3, 64, 64) def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "output_type": "pt", + } image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) image = image.cpu().permute(0, 2, 3, 1)[0] init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + # create mask image[8:, 8:, :] = 255 mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) @@ -447,18 +471,21 @@ def get_dummy_inputs(self, device, seed=0): def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.40872607, - 0.38842705, - 0.34893104, - 0.47837183, - 0.43792963, - 0.5332134, - 0.3716843, - 0.47274873, - 0.45000193, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.40872607, + 0.38842705, + 0.34893104, + 0.47837183, + 0.43792963, + 0.5332134, + 0.3716843, + 0.47274873, + 0.45000193, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index 2e7481a8c8bc..1325e5c1de3c 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -71,12 +71,6 @@ def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]: "See existing pipeline tests for reference." ) - def get_pipeline(self): - raise NotImplementedError( - "You need to implement `get_pipeline(self)` in the child test class. " - "See existing pipeline tests for reference." - ) - def get_dummy_inputs(self, seed=0): raise NotImplementedError( "You need to implement `get_dummy_inputs(self, device, seed)` in the child test class. " @@ -126,7 +120,7 @@ def teardown_method(self): def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(self.repo, torch_dtype=torch_dtype) + pipeline.load_components(torch_dtype=torch_dtype) return pipeline def test_pipeline_call_signature(self): @@ -136,9 +130,9 @@ def test_pipeline_call_signature(self): def _check_for_parameters(parameters, expected_parameters, param_type): remaining_parameters = {param for param in parameters if param not in expected_parameters} - assert ( - len(remaining_parameters) == 0 - ), f"Required {param_type} parameters not present: {remaining_parameters}" + assert len(remaining_parameters) == 0, ( + f"Required {param_type} parameters not present: {remaining_parameters}" + ) _check_for_parameters(self.params, input_parameters, "input") _check_for_parameters(self.optional_params, optional_parameters, "optional") @@ -148,7 +142,7 @@ def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() inputs["generator"] = self.get_generator(0) logger = logging.get_logger(pipe.__module__) @@ -188,7 +182,7 @@ def test_inference_batch_single_identical( pipe = self.get_pipeline() pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() # Reset generator in case it is has been used in self.get_dummy_inputs inputs["generator"] = self.get_generator(0) @@ -218,7 +212,7 @@ def test_inference_batch_single_identical( assert output_batch.shape[0] == batch_size - max_diff = np.abs(to_np(output_batch[0]) - to_np(output[0])).max() + max_diff = torch.abs(output_batch[0] - output[0]).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" @require_accelerator @@ -231,13 +225,13 @@ def test_float16_inference(self, expected_max_diff=5e-2): pipe_fp16.to(torch_device, torch.float16) pipe_fp16.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() # Reset generator in case it is used inside dummy inputs if "generator" in inputs: inputs["generator"] = self.get_generator(0) output = pipe(**inputs, output="images") - fp16_inputs = self.get_dummy_inputs(torch_device) + fp16_inputs = self.get_dummy_inputs() # Reset generator in case it is used inside dummy inputs if "generator" in fp16_inputs: fp16_inputs["generator"] = self.get_generator(0) @@ -265,16 +259,16 @@ def test_to_device(self): model_devices = [ component.device.type for component in pipe.components.values() if hasattr(component, "device") ] - assert all( - device == torch_device for device in model_devices - ), "All pipeline components are not on accelerator device" + assert all(device == torch_device for device in model_devices), ( + "All pipeline components are not on accelerator device" + ) def test_inference_is_not_nan_cpu(self): pipe = self.get_pipeline() pipe.set_progress_bar_config(disable=None) pipe.to("cpu") - output = pipe(**self.get_dummy_inputs("cpu"), output="images") + output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" @require_accelerator @@ -283,7 +277,7 @@ def test_inference_is_not_nan(self): pipe.set_progress_bar_config(disable=None) pipe.to(torch_device) - output = pipe(**self.get_dummy_inputs(torch_device), output="images") + output = pipe(**self.get_dummy_inputs(), output="images") assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN" def test_num_images_per_prompt(self): @@ -300,7 +294,7 @@ def test_num_images_per_prompt(self): for batch_size in batch_sizes: for num_images_per_prompt in num_images_per_prompts: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() for key in inputs.keys(): if key in self.batch_params: @@ -320,7 +314,7 @@ def test_components_auto_cpu_offload_inference_consistent(self): image_slices = [] for pipe in [base_pipe, offload_pipe]: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten()) @@ -342,7 +336,7 @@ def test_save_from_pretrained(self): image_slices = [] for pipe in pipes: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten())