diff --git a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py index 9d70c21aa8cd..a29fd436149d 100644 --- a/tests/modular_pipelines/flux/test_modular_pipeline_flux.py +++ b/tests/modular_pipelines/flux/test_modular_pipeline_flux.py @@ -15,7 +15,6 @@ import random import tempfile -import unittest import numpy as np import PIL @@ -34,21 +33,16 @@ from ..test_modular_pipelines_common import ModularPipelineTesterMixin -class FluxModularTests: +class TestFluxModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxModularPipeline pipeline_blocks_class = FluxAutoBlocks repo = "hf-internal-testing/tiny-flux-modular" - def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): - pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(torch_dtype=torch_dtype) - return pipeline + params = frozenset(["prompt", "height", "width", "guidance_scale"]) + batch_params = frozenset(["prompt"]) - def get_dummy_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device=device).manual_seed(seed) + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) inputs = { "prompt": "A painting of a squirrel eating a burger", "generator": generator, @@ -57,36 +51,47 @@ def get_dummy_inputs(self, device, seed=0): "height": 8, "width": 8, "max_sequence_length": 48, - "output_type": "np", + "output_type": "pt", } return inputs -class FluxModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): - params = frozenset(["prompt", "height", "width", "guidance_scale"]) - batch_params = frozenset(["prompt"]) - +class TestFluxImg2ImgModularPipelineFast(ModularPipelineTesterMixin): + pipeline_class = FluxModularPipeline + pipeline_blocks_class = FluxAutoBlocks + repo = "hf-internal-testing/tiny-flux-modular" -class FluxImg2ImgModularPipelineFastTests(FluxModularTests, ModularPipelineTesterMixin, unittest.TestCase): params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) batch_params = frozenset(["prompt", "image"]) def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): pipeline = super().get_pipeline(components_manager, torch_dtype) + # Override `vae_scale_factor` here as currently, `image_processor` is initialized with # fixed constants instead of # https://github.com/huggingface/diffusers/blob/d54622c2679d700b425ad61abce9b80fc36212c0/src/diffusers/pipelines/flux/pipeline_flux_img2img.py#L230C9-L232C10 pipeline.image_processor = VaeImageProcessor(vae_scale_factor=2) return pipeline - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) - image = image / 2 + 0.5 - inputs["image"] = image - inputs["strength"] = 0.8 - inputs["height"] = 8 - inputs["width"] = 8 + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "guidance_scale": 5.0, + "height": 8, + "width": 8, + "max_sequence_length": 48, + "output_type": "pt", + } + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = PIL.Image.fromarray(np.uint8(image)).convert("RGB") + + inputs["image"] = init_image + inputs["strength"] = 0.5 + return inputs def test_save_from_pretrained(self): @@ -96,6 +101,7 @@ def test_save_from_pretrained(self): with tempfile.TemporaryDirectory() as tmpdirname: base_pipe.save_pretrained(tmpdirname) + pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) pipe.load_components(torch_dtype=torch.float32) pipe.to(torch_device) @@ -105,26 +111,62 @@ def test_save_from_pretrained(self): image_slices = [] for pipe in pipes: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 -class FluxKontextModularPipelineFastTests(FluxImg2ImgModularPipelineFastTests): +class TestFluxKontextModularPipelineFast(ModularPipelineTesterMixin): pipeline_class = FluxKontextModularPipeline pipeline_blocks_class = FluxKontextAutoBlocks repo = "hf-internal-testing/tiny-flux-kontext-pipe" - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) + params = frozenset(["prompt", "height", "width", "guidance_scale", "image"]) + batch_params = frozenset(["prompt", "image"]) + + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "guidance_scale": 5.0, + "height": 8, + "width": 8, + "max_sequence_length": 48, + "output_type": "pt", + } image = PIL.Image.new("RGB", (32, 32), 0) - _ = inputs.pop("strength") + inputs["image"] = image - inputs["height"] = 8 - inputs["width"] = 8 - inputs["max_area"] = 8 * 8 + inputs["max_area"] = inputs["height"] * inputs["width"] inputs["_auto_resize"] = False + return inputs + + def test_save_from_pretrained(self): + pipes = [] + base_pipe = self.get_pipeline().to(torch_device) + pipes.append(base_pipe) + + with tempfile.TemporaryDirectory() as tmpdirname: + base_pipe.save_pretrained(tmpdirname) + + pipe = ModularPipeline.from_pretrained(tmpdirname).to(torch_device) + pipe.load_components(torch_dtype=torch.float32) + pipe.to(torch_device) + pipe.image_processor = VaeImageProcessor(vae_scale_factor=2) + + pipes.append(pipe) + + image_slices = [] + for pipe in pipes: + inputs = self.get_dummy_inputs() + image = pipe(**inputs, output="images") + + image_slices.append(image[0, -3:, -3:, -1].flatten()) + + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py index 22347aa5589c..ea54b2bdff47 100644 --- a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py +++ b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py @@ -14,7 +14,6 @@ # limitations under the License. import random -import unittest from typing import Any, Dict import numpy as np @@ -32,63 +31,26 @@ enable_full_determinism() -class SDXLModularTests: +class SDXLModularTesterMixin: """ This mixin defines method to create pipeline, base input and base test across all SDXL modular tests. """ - pipeline_class = StableDiffusionXLModularPipeline - pipeline_blocks_class = StableDiffusionXLAutoBlocks - repo = "hf-internal-testing/tiny-sdxl-modular" - params = frozenset( - [ - "prompt", - "height", - "width", - "negative_prompt", - "cross_attention_kwargs", - "image", - "mask_image", - ] - ) - batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) - - def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): - pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) - pipeline.load_components(torch_dtype=torch_dtype) - return pipeline - - def get_dummy_inputs(self, device, seed=0): - if str(device).startswith("mps"): - generator = torch.manual_seed(seed) - else: - generator = torch.Generator(device=device).manual_seed(seed) - inputs = { - "prompt": "A painting of a squirrel eating a burger", - "generator": generator, - "num_inference_steps": 2, - "output_type": "np", - } - return inputs - def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2): - device = "cpu" # ensure determinism for the device-dependent torch.Generator sd_pipe = self.get_pipeline() - sd_pipe = sd_pipe.to(device) + sd_pipe = sd_pipe.to(torch_device) sd_pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(device) + inputs = self.get_dummy_inputs() image = sd_pipe(**inputs, output="images") image_slice = image[0, -3:, -3:, -1] assert image.shape == expected_image_shape - - assert np.abs(image_slice.flatten() - expected_slice).max() < expected_max_diff, ( - "Image Slice does not match expected slice" - ) + max_diff = torch.abs(image_slice.flatten() - expected_slice).max() + assert max_diff < expected_max_diff, f"Image slice does not match expected slice. Max Difference: {max_diff}" -class SDXLModularIPAdapterTests: +class SDXLModularIPAdapterTesterMixin: """ This mixin is designed to test IP Adapter. """ @@ -127,7 +89,7 @@ def _modify_inputs_for_ip_adapter_test(self, inputs: Dict[str, Any]): if "image" in parameters and "strength" in parameters: inputs["num_inference_steps"] = 4 - inputs["output_type"] = "np" + inputs["output_type"] = "pt" return inputs def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=None): @@ -152,7 +114,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N cross_attention_dim = pipe.unet.config.get("cross_attention_dim") # forward pass without ip adapter - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) if expected_pipe_slice is None: output_without_adapter = pipe(**inputs, output="images") else: @@ -163,7 +125,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.unet._load_ip_adapter_weights(adapter_state_dict) # forward pass with single ip adapter, but scale=0 which should have no effect - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] pipe.set_ip_adapter_scale(0.0) @@ -172,7 +134,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_without_adapter_scale = output_without_adapter_scale[0, -3:, -3:, -1].flatten() # forward pass with single ip adapter, but with scale of adapter weights - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] pipe.set_ip_adapter_scale(42.0) @@ -180,8 +142,8 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N if expected_pipe_slice is not None: output_with_adapter_scale = output_with_adapter_scale[0, -3:, -3:, -1].flatten() - max_diff_without_adapter_scale = np.abs(output_without_adapter_scale - output_without_adapter).max() - max_diff_with_adapter_scale = np.abs(output_with_adapter_scale - output_without_adapter).max() + max_diff_without_adapter_scale = torch.abs(output_without_adapter_scale - output_without_adapter).max() + max_diff_with_adapter_scale = torch.abs(output_with_adapter_scale - output_without_adapter).max() assert max_diff_without_adapter_scale < expected_max_diff, ( "Output without ip-adapter must be same as normal inference" @@ -194,7 +156,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.unet._load_ip_adapter_weights([adapter_state_dict_1, adapter_state_dict_2]) # forward pass with multi ip adapter, but scale=0 which should have no effect - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 pipe.set_ip_adapter_scale([0.0, 0.0]) @@ -203,7 +165,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N output_without_multi_adapter_scale = output_without_multi_adapter_scale[0, -3:, -3:, -1].flatten() # forward pass with multi ip adapter, but with scale of adapter weights - inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs()) inputs["ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 inputs["negative_ip_adapter_embeds"] = [self._get_dummy_image_embeds(cross_attention_dim)] * 2 pipe.set_ip_adapter_scale([42.0, 42.0]) @@ -211,10 +173,10 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N if expected_pipe_slice is not None: output_with_multi_adapter_scale = output_with_multi_adapter_scale[0, -3:, -3:, -1].flatten() - max_diff_without_multi_adapter_scale = np.abs( + max_diff_without_multi_adapter_scale = torch.abs( output_without_multi_adapter_scale - output_without_adapter ).max() - max_diff_with_multi_adapter_scale = np.abs(output_with_multi_adapter_scale - output_without_adapter).max() + max_diff_with_multi_adapter_scale = torch.abs(output_with_multi_adapter_scale - output_without_adapter).max() assert max_diff_without_multi_adapter_scale < expected_max_diff, ( "Output without multi-ip-adapter must be same as normal inference" ) @@ -223,7 +185,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N ) -class SDXLModularControlNetTests: +class SDXLModularControlNetTesterMixin: """ This mixin is designed to test ControlNet. """ @@ -262,24 +224,26 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N pipe.set_progress_bar_config(disable=None) # forward pass without controlnet - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() output_without_controlnet = pipe(**inputs, output="images") output_without_controlnet = output_without_controlnet[0, -3:, -3:, -1].flatten() # forward pass with single controlnet, but scale=0 which should have no effect - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) inputs["controlnet_conditioning_scale"] = 0.0 output_without_controlnet_scale = pipe(**inputs, output="images") output_without_controlnet_scale = output_without_controlnet_scale[0, -3:, -3:, -1].flatten() # forward pass with single controlnet, but with scale of adapter weights - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) inputs["controlnet_conditioning_scale"] = 42.0 output_with_controlnet_scale = pipe(**inputs, output="images") output_with_controlnet_scale = output_with_controlnet_scale[0, -3:, -3:, -1].flatten() - max_diff_without_controlnet_scale = np.abs(output_without_controlnet_scale - output_without_controlnet).max() - max_diff_with_controlnet_scale = np.abs(output_with_controlnet_scale - output_without_controlnet).max() + max_diff_without_controlnet_scale = torch.abs( + output_without_controlnet_scale - output_without_controlnet + ).max() + max_diff_with_controlnet_scale = torch.abs(output_with_controlnet_scale - output_without_controlnet).max() assert max_diff_without_controlnet_scale < expected_max_diff, ( "Output without controlnet must be same as normal inference" @@ -295,21 +259,21 @@ def test_controlnet_cfg(self): guider = ClassifierFreeGuidance(guidance_scale=1.0) pipe.update_components(guider=guider) - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) out_no_cfg = pipe(**inputs, output="images") # forward pass with CFG applied guider = ClassifierFreeGuidance(guidance_scale=7.5) pipe.update_components(guider=guider) - inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs(torch_device)) + inputs = self._modify_inputs_for_controlnet_test(self.get_dummy_inputs()) out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape - max_diff = np.abs(out_cfg - out_no_cfg).max() + max_diff = torch.abs(out_cfg - out_no_cfg).max() assert max_diff > 1e-2, "Output with CFG must be different from normal inference" -class SDXLModularGuiderTests: +class SDXLModularGuiderTesterMixin: def test_guider_cfg(self): pipe = self.get_pipeline() pipe = pipe.to(torch_device) @@ -319,13 +283,13 @@ def test_guider_cfg(self): guider = ClassifierFreeGuidance(guidance_scale=1.0) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_no_cfg = pipe(**inputs, output="images") # forward pass with CFG applied guider = ClassifierFreeGuidance(guidance_scale=7.5) pipe.update_components(guider=guider) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() out_cfg = pipe(**inputs, output="images") assert out_cfg.shape == out_no_cfg.shape @@ -333,30 +297,57 @@ def test_guider_cfg(self): assert max_diff > 1e-2, "Output with CFG must be different from normal inference" -class SDXLModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, +class TestSDXLModularPipelineFast( + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ModularPipelineTesterMixin, - unittest.TestCase, ): """Test cases for Stable Diffusion XL modular pipeline fast tests.""" + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt"]) + expected_image_output_shape = (1, 3, 64, 64) + + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 2, + "output_type": "pt", + } + return inputs + def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.5966781, - 0.62939394, - 0.48465094, - 0.51573336, - 0.57593524, - 0.47035995, - 0.53410417, - 0.51436996, - 0.47313565, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.5966781, + 0.62939394, + 0.48465094, + 0.51573336, + 0.57593524, + 0.47035995, + 0.53410417, + 0.51436996, + 0.47313565, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) @@ -364,39 +355,65 @@ def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) -class SDXLImg2ImgModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, +class TestSDXLImg2ImgModularPipelineFast( + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ModularPipelineTesterMixin, - unittest.TestCase, ): """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests.""" - def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) - image = floats_tensor((1, 3, 64, 64), rng=random.Random(seed)).to(device) - image = image / 2 + 0.5 - inputs["image"] = image - inputs["strength"] = 0.8 + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + "image", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt", "image"]) + expected_image_output_shape = (1, 3, 64, 64) + + def get_dummy_inputs(self, seed=0): + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "output_type": "pt", + } + image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(torch_device) + image = image.cpu().permute(0, 2, 3, 1)[0] + init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + + inputs["image"] = init_image + inputs["strength"] = 0.5 return inputs def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.56943184, - 0.4702148, - 0.48048905, - 0.6235963, - 0.551138, - 0.49629188, - 0.60031277, - 0.5688907, - 0.43996853, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.56943184, + 0.4702148, + 0.48048905, + 0.6235963, + 0.551138, + 0.49629188, + 0.60031277, + 0.5688907, + 0.43996853, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) @@ -405,20 +422,43 @@ def test_inference_batch_single_identical(self): class SDXLInpaintingModularPipelineFastTests( - SDXLModularTests, - SDXLModularIPAdapterTests, - SDXLModularControlNetTests, - SDXLModularGuiderTests, + SDXLModularTesterMixin, + SDXLModularIPAdapterTesterMixin, + SDXLModularControlNetTesterMixin, + SDXLModularGuiderTesterMixin, ModularPipelineTesterMixin, - unittest.TestCase, ): """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests.""" + pipeline_class = StableDiffusionXLModularPipeline + pipeline_blocks_class = StableDiffusionXLAutoBlocks + repo = "hf-internal-testing/tiny-sdxl-modular" + params = frozenset( + [ + "prompt", + "height", + "width", + "negative_prompt", + "cross_attention_kwargs", + "image", + "mask_image", + ] + ) + batch_params = frozenset(["prompt", "negative_prompt", "image", "mask_image"]) + expected_image_output_shape = (1, 3, 64, 64) + def get_dummy_inputs(self, device, seed=0): - inputs = super().get_dummy_inputs(device, seed) + generator = self.get_generator(seed) + inputs = { + "prompt": "A painting of a squirrel eating a burger", + "generator": generator, + "num_inference_steps": 4, + "output_type": "pt", + } image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device) image = image.cpu().permute(0, 2, 3, 1)[0] init_image = Image.fromarray(np.uint8(image)).convert("RGB").resize((64, 64)) + # create mask image[8:, 8:, :] = 255 mask_image = Image.fromarray(np.uint8(image)).convert("L").resize((64, 64)) @@ -431,18 +471,21 @@ def get_dummy_inputs(self, device, seed=0): def test_stable_diffusion_xl_euler(self): self._test_stable_diffusion_xl_euler( - expected_image_shape=(1, 64, 64, 3), - expected_slice=[ - 0.40872607, - 0.38842705, - 0.34893104, - 0.47837183, - 0.43792963, - 0.5332134, - 0.3716843, - 0.47274873, - 0.45000193, - ], + expected_image_shape=self.expected_image_output_shape, + expected_slice=torch.tensor( + [ + 0.40872607, + 0.38842705, + 0.34893104, + 0.47837183, + 0.43792963, + 0.5332134, + 0.3716843, + 0.47274873, + 0.45000193, + ], + device=torch_device, + ), expected_max_diff=1e-2, ) diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py index d309fcf35339..1325e5c1de3c 100644 --- a/tests/modular_pipelines/test_modular_pipelines_common.py +++ b/tests/modular_pipelines/test_modular_pipelines_common.py @@ -1,9 +1,7 @@ import gc import tempfile -import unittest from typing import Callable, Union -import numpy as np import torch import diffusers @@ -19,17 +17,9 @@ ) -def to_np(tensor): - if isinstance(tensor, torch.Tensor): - tensor = tensor.detach().cpu().numpy() - - return tensor - - @require_torch class ModularPipelineTesterMixin: """ - This mixin is designed to be used with unittest.TestCase classes. It provides a set of common tests for each modular pipeline, including: - test_pipeline_call_signature: check if the pipeline's __call__ method has all required parameters @@ -57,9 +47,8 @@ class ModularPipelineTesterMixin: ] ) - def get_generator(self, seed): - device = torch_device if torch_device != "mps" else "cpu" - generator = torch.Generator(device).manual_seed(seed) + def get_generator(self, seed=0): + generator = torch.Generator("cpu").manual_seed(seed) return generator @property @@ -82,13 +71,7 @@ def pipeline_blocks_class(self) -> Union[Callable, ModularPipelineBlocks]: "See existing pipeline tests for reference." ) - def get_pipeline(self): - raise NotImplementedError( - "You need to implement `get_pipeline(self)` in the child test class. " - "See existing pipeline tests for reference." - ) - - def get_dummy_inputs(self, device, seed=0): + def get_dummy_inputs(self, seed=0): raise NotImplementedError( "You need to implement `get_dummy_inputs(self, device, seed)` in the child test class. " "See existing pipeline tests for reference." @@ -123,20 +106,23 @@ def batch_params(self) -> frozenset: "See existing pipeline tests for reference." ) - def setUp(self): + def setup_method(self): # clean up the VRAM before each test - super().setUp() torch.compiler.reset() gc.collect() backend_empty_cache(torch_device) - def tearDown(self): + def teardown_method(self): # clean up the VRAM after each test in case of CUDA runtime errors - super().tearDown() torch.compiler.reset() gc.collect() backend_empty_cache(torch_device) + def get_pipeline(self, components_manager=None, torch_dtype=torch.float32): + pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager) + pipeline.load_components(torch_dtype=torch_dtype) + return pipeline + def test_pipeline_call_signature(self): pipe = self.get_pipeline() input_parameters = pipe.blocks.input_names @@ -156,7 +142,7 @@ def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True) pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() inputs["generator"] = self.get_generator(0) logger = logging.get_logger(pipe.__module__) @@ -196,7 +182,7 @@ def test_inference_batch_single_identical( pipe = self.get_pipeline() pipe.to(torch_device) pipe.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() # Reset generator in case it is has been used in self.get_dummy_inputs inputs["generator"] = self.get_generator(0) @@ -226,10 +212,9 @@ def test_inference_batch_single_identical( assert output_batch.shape[0] == batch_size - max_diff = np.abs(to_np(output_batch[0]) - to_np(output[0])).max() + max_diff = torch.abs(output_batch[0] - output[0]).max() assert max_diff < expected_max_diff, "Batch inference results different from single inference results" - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_float16_inference(self, expected_max_diff=5e-2): pipe = self.get_pipeline() @@ -240,13 +225,13 @@ def test_float16_inference(self, expected_max_diff=5e-2): pipe_fp16.to(torch_device, torch.float16) pipe_fp16.set_progress_bar_config(disable=None) - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() # Reset generator in case it is used inside dummy inputs if "generator" in inputs: inputs["generator"] = self.get_generator(0) output = pipe(**inputs, output="images") - fp16_inputs = self.get_dummy_inputs(torch_device) + fp16_inputs = self.get_dummy_inputs() # Reset generator in case it is used inside dummy inputs if "generator" in fp16_inputs: fp16_inputs["generator"] = self.get_generator(0) @@ -283,8 +268,8 @@ def test_inference_is_not_nan_cpu(self): pipe.set_progress_bar_config(disable=None) pipe.to("cpu") - output = pipe(**self.get_dummy_inputs("cpu"), output="images") - assert np.isnan(to_np(output)).sum() == 0, "CPU Inference returns NaN" + output = pipe(**self.get_dummy_inputs(), output="images") + assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN" @require_accelerator def test_inference_is_not_nan(self): @@ -292,8 +277,8 @@ def test_inference_is_not_nan(self): pipe.set_progress_bar_config(disable=None) pipe.to(torch_device) - output = pipe(**self.get_dummy_inputs(torch_device), output="images") - assert np.isnan(to_np(output)).sum() == 0, "Accelerator Inference returns NaN" + output = pipe(**self.get_dummy_inputs(), output="images") + assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN" def test_num_images_per_prompt(self): pipe = self.get_pipeline() @@ -309,7 +294,7 @@ def test_num_images_per_prompt(self): for batch_size in batch_sizes: for num_images_per_prompt in num_images_per_prompts: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() for key in inputs.keys(): if key in self.batch_params: @@ -329,12 +314,12 @@ def test_components_auto_cpu_offload_inference_consistent(self): image_slices = [] for pipe in [base_pipe, offload_pipe]: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3 def test_save_from_pretrained(self): pipes = [] @@ -351,9 +336,9 @@ def test_save_from_pretrained(self): image_slices = [] for pipe in pipes: - inputs = self.get_dummy_inputs(torch_device) + inputs = self.get_dummy_inputs() image = pipe(**inputs, output="images") image_slices.append(image[0, -3:, -3:, -1].flatten()) - assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 + assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3