wan2-1-fast

Running on Zero

App Files Files Community

cbensimon HF Staff commited on 9 days ago

Commit

1017ac0

1 Parent(s): 1e98a9a

Cleanup

Browse files

Files changed (3) hide show

app.py +13 -10
optimization.py +5 -3
optimization_utils.py +18 -18

app.py CHANGED Viewed

@@ -54,24 +54,27 @@ default_prompt_i2v = "make this image come alive, cinematic motion, smooth anima
 default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
-def fit_to_480p(image: Image.Image) -> Image.Image:
     target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
     width, height = image.size
     in_aspect = width / height
     if in_aspect > target_aspect:
-        new_width = int(height * target_aspect)
         left = (width - new_width) // 2
         image = image.crop((left, 0, left + new_width, height))
     else:
-        new_height = int(width / target_aspect)
         top = (height - new_height) // 2
         image = image.crop((0, top, width, top + new_height))
-    target_width, target_height = (
-        (LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT)
-        if in_aspect > target_aspect else
-        (LANDSCAPE_HEIGHT, LANDSCAPE_WIDTH)
-    )
-    return image.resize((target_width, target_height), Image.LANCZOS)
 def get_duration(
     input_image,
@@ -147,7 +150,7 @@ def generate_video(
     num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    resized_image = fit_to_480p(input_image)
     output_frames_list = pipe(
         image=resized_image,

 default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
+def resize_image(image: Image.Image) -> Image.Image:
+    if image.height > image.width:
+        transposed = image.transpose(Image.Transpose.ROTATE_90)
+        resized = resize_image_landscape(transposed)
+        return resized.transpose(Image.Transpose.ROTATE_270)
+    return resize_image_landscape(image)
+def resize_image_landscape(image: Image.Image) -> Image.Image:
     target_aspect = LANDSCAPE_WIDTH / LANDSCAPE_HEIGHT
     width, height = image.size
     in_aspect = width / height
     if in_aspect > target_aspect:
+        new_width = round(height * target_aspect)
         left = (width - new_width) // 2
         image = image.crop((left, 0, left + new_width, height))
     else:
+        new_height = round(width / target_aspect)
         top = (height - new_height) // 2
         image = image.crop((0, top, width, top + new_height))
+    return image.resize((LANDSCAPE_WIDTH, LANDSCAPE_HEIGHT), Image.LANCZOS)
 def get_duration(
     input_image,
     num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+    resized_image = resize_image(input_image)
     output_frames_list = pipe(
         image=resized_image,

optimization.py CHANGED Viewed

@@ -89,10 +89,12 @@ def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kw
         compiled_landscape = aoti_compile(exported_landscape, INDUCTOR_CONFIGS)
         print('compiled_landscape', -(t0 - (t0 := datetime.now())))
-        compiled_portrait = aoti_compile(exported_portrait, INDUCTOR_CONFIGS) # TODO: weights_from=compiled_landscape
-        compiled_portrait.weights.clear()
         print('compiled_portrait', -(t0 - (t0 := datetime.now())))
         return compiled_landscape, compiled_portrait
     compiled_landscape, compiled_portrait = compile_transformer()

         compiled_landscape = aoti_compile(exported_landscape, INDUCTOR_CONFIGS)
         print('compiled_landscape', -(t0 - (t0 := datetime.now())))
+        compiled_portrait = aoti_compile(exported_portrait, INDUCTOR_CONFIGS)
         print('compiled_portrait', -(t0 - (t0 := datetime.now())))
+        # Avoid weights duplication when serializing back to main process
+        compiled_portrait.weights = compiled_landscape.weights
         return compiled_landscape, compiled_portrait
     compiled_landscape, compiled_portrait = compile_transformer()

optimization_utils.py CHANGED Viewed

@@ -10,7 +10,6 @@ from unittest.mock import patch
 import torch
 from torch._inductor.package.package import package_aoti
 from torch.export.pt2_archive._package import AOTICompiledModel
-from torch.export.pt2_archive._package_weights import TensorProperties
 from torch.export.pt2_archive._package_weights import Weights
@@ -21,31 +20,31 @@ INDUCTOR_CONFIGS_OVERRIDES = {
 }
 class ZeroGPUCompiledModel:
-    def __init__(self, archive_file: torch.types.FileLike, weights: Weights, cuda: bool = False):
         self.archive_file = archive_file
         self.weights = weights
-        if cuda:
-            self.weights_to_cuda_()
         self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
-    def weights_to_cuda_(self):
-        for name in self.weights:
-            tensor, properties = self.weights.get_weight(name)
-            self.weights[name] = (tensor.to('cuda'), properties)
     def __call__(self, *args, **kwargs):
         if (compiled_model := self.compiled_model.get()) is None:
-            constants_map = {name: value[0] for name, value in self.weights.items()}
             compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
-            compiled_model.load_constants(constants_map, check_full_update=True, user_managed=True)
             self.compiled_model.set(compiled_model)
         return compiled_model(*args, **kwargs)
-    def __reduce__(self):
-        weight_dict: dict[str, tuple[torch.Tensor, TensorProperties]] = {}
-        for name in self.weights:
-            tensor, properties = self.weights.get_weight(name)
-            tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
-            weight_dict[name] = (tensor_.copy_(tensor).detach().share_memory_(), properties)
-        return ZeroGPUCompiledModel, (self.archive_file, Weights(weight_dict), True)
 def aoti_compile(
@@ -61,7 +60,8 @@ def aoti_compile(
     files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
-    return ZeroGPUCompiledModel(archive_file, weights)
 @contextlib.contextmanager

 import torch
 from torch._inductor.package.package import package_aoti
 from torch.export.pt2_archive._package import AOTICompiledModel
 from torch.export.pt2_archive._package_weights import Weights
 }
+class ZeroGPUWeights:
+    def __init__(self, constants_map: dict[str, torch.Tensor], to_cuda: bool = False):
+        if to_cuda:
+            self.constants_map = {name: tensor.to('cuda') for name, tensor in constants_map.items()}
+        else:
+            self.constants_map = constants_map
+    def __reduce__(self):
+        constants_map: dict[str, torch.Tensor] = {}
+        for name, tensor in self.constants_map.items():
+            tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
+            constants_map[name] = tensor_.copy_(tensor).detach().share_memory_()
+        return ZeroGPUWeights, (constants_map, True)
 class ZeroGPUCompiledModel:
+    def __init__(self, archive_file: torch.types.FileLike, weights: ZeroGPUWeights):
         self.archive_file = archive_file
         self.weights = weights
         self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
     def __call__(self, *args, **kwargs):
         if (compiled_model := self.compiled_model.get()) is None:
             compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
+            compiled_model.load_constants(self.weights.constants_map, check_full_update=True, user_managed=True)
             self.compiled_model.set(compiled_model)
         return compiled_model(*args, **kwargs)
 def aoti_compile(
     files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
+    zerogpu_weights = ZeroGPUWeights({name: weights.get_weight(name)[0] for name in weights})
+    return ZeroGPUCompiledModel(archive_file, zerogpu_weights)
 @contextlib.contextmanager