Commit
·
811d1c6
1
Parent(s):
7956ca3
fix more tests
Browse files- 1 +26 -0
- all_branches.txt +0 -40
- check_for_branches.py +1 -1
- collect_env.py +609 -0
- init_image.png +0 -0
- mask_image.png +0 -0
- model_ids.txt +0 -0
- new_scheduler.py +22 -0
- prompt_weight.py +35 -0
- run_bug_conv.py +63 -0
- run_local_fuse_xl.py +38 -0
- run_local_xl.py +4 -5
- run_lora.py +43 -0
- run_wuerst.py +37 -0
- run_xl_lora.py +4 -1
- sd_xl_inpaint.py +76 -0
- train_unet.py +24 -0
1
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import UNet2DConditionModel
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
|
| 6 |
+
unet.train()
|
| 7 |
+
unet.enable_gradient_checkpointing()
|
| 8 |
+
unet = unet.to("cuda:1")
|
| 9 |
+
|
| 10 |
+
batch_size = 8
|
| 11 |
+
|
| 12 |
+
sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
|
| 13 |
+
time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
|
| 14 |
+
encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
|
| 15 |
+
text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
|
| 16 |
+
|
| 17 |
+
out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
|
| 18 |
+
|
| 19 |
+
loss = ((out - sample) ** 2).mean()
|
| 20 |
+
loss.backward()
|
| 21 |
+
|
| 22 |
+
print(torch.cuda.max_memory_allocated(device=unet.device))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# no gradient checkpointing: 12,276,695,552
|
| 26 |
+
# curr gradient checkpointing: 10,862,276,096
|
all_branches.txt
CHANGED
|
@@ -1,42 +1,2 @@
|
|
| 1 |
-
CompVis/stable-diffusion-v1-3
|
| 2 |
-
CompVis/stable-diffusion-v1-1
|
| 3 |
-
CompVis/stable-diffusion-v1-2
|
| 4 |
CompVis/stable-diffusion-v1-4
|
| 5 |
-
hakurei/waifu-diffusion
|
| 6 |
-
rinna/japanese-stable-diffusion
|
| 7 |
-
CompVis/stable-diffusion-v1-5
|
| 8 |
-
runwayml/stable-diffusion-inpainting
|
| 9 |
-
fusing/sd-inpaint-temp
|
| 10 |
runwayml/stable-diffusion-v1-5
|
| 11 |
-
ckpt/sd15
|
| 12 |
-
aarondotwork/sd-pokemon-diffusers
|
| 13 |
-
technillogue/waifu-diffusion
|
| 14 |
-
DGSpitzer/Cyberpunk-Anime-Diffusion
|
| 15 |
-
microsoft/vq-diffusion-ithq
|
| 16 |
-
fusing/rdm
|
| 17 |
-
CompVis/ldm-super-resolution-4x-openimages
|
| 18 |
-
BAAI/AltDiffusion
|
| 19 |
-
fusing/test
|
| 20 |
-
stabilityai/stable-diffusion-2
|
| 21 |
-
stabilityai/stable-diffusion-2-base
|
| 22 |
-
stabilityai/stable-diffusion-2-depth
|
| 23 |
-
stabilityai/stable-diffusion-2-inpainting
|
| 24 |
-
stabilityai/stable-diffusion-x4-upscaler
|
| 25 |
-
jplumail/matthieu-v1-pipe
|
| 26 |
-
stabilityai/stable-diffusion-2-1
|
| 27 |
-
stabilityai/stable-diffusion-2-1-base
|
| 28 |
-
jplumail/matthieu-v2-pipe
|
| 29 |
-
timbrooks/instruct-pix2pix
|
| 30 |
-
ruiruin/counmargemodel
|
| 31 |
-
Nacholmo/AbyssOrangeMix2-hard-vae-swapped
|
| 32 |
-
Nacholmo/Counterfeit-V2.5-vae-swapped
|
| 33 |
-
Nacholmo/VOXO-v0-vtuber-diffusers
|
| 34 |
-
p1atdev/pvc-v3
|
| 35 |
-
Nacholmo/meinamixv7-diffusers
|
| 36 |
-
gligen/diffusers-generation-text-box
|
| 37 |
-
gligen/diffusers-inpainting-text-box
|
| 38 |
-
zhg/deliberate
|
| 39 |
-
philz1337/realism
|
| 40 |
-
viktfb/patterngenai
|
| 41 |
-
viktfb/patterngen-v1
|
| 42 |
-
viktfb/style2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
CompVis/stable-diffusion-v1-4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
runwayml/stable-diffusion-v1-5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
check_for_branches.py
CHANGED
|
@@ -26,7 +26,7 @@ if __name__ == "__main__":
|
|
| 26 |
api = HfApi()
|
| 27 |
branches = main(api, model_id)
|
| 28 |
|
| 29 |
-
if "
|
| 30 |
print(model_id)
|
| 31 |
#
|
| 32 |
# if len(branches) > 0:
|
|
|
|
| 26 |
api = HfApi()
|
| 27 |
branches = main(api, model_id)
|
| 28 |
|
| 29 |
+
if "non-ema" in branches:
|
| 30 |
print(model_id)
|
| 31 |
#
|
| 32 |
# if len(branches) > 0:
|
collect_env.py
ADDED
|
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Unlike the rest of the PyTorch this file must be python2 compliant.
|
| 3 |
+
# This script outputs relevant system environment info
|
| 4 |
+
# Run it with `python collect_env.py`.
|
| 5 |
+
import datetime
|
| 6 |
+
import locale
|
| 7 |
+
import re
|
| 8 |
+
import subprocess
|
| 9 |
+
import sys
|
| 10 |
+
import os
|
| 11 |
+
from collections import namedtuple
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import torch
|
| 16 |
+
TORCH_AVAILABLE = True
|
| 17 |
+
except (ImportError, NameError, AttributeError, OSError):
|
| 18 |
+
TORCH_AVAILABLE = False
|
| 19 |
+
|
| 20 |
+
# System Environment Information
|
| 21 |
+
SystemEnv = namedtuple('SystemEnv', [
|
| 22 |
+
'torch_version',
|
| 23 |
+
'is_debug_build',
|
| 24 |
+
'cuda_compiled_version',
|
| 25 |
+
'gcc_version',
|
| 26 |
+
'clang_version',
|
| 27 |
+
'cmake_version',
|
| 28 |
+
'os',
|
| 29 |
+
'libc_version',
|
| 30 |
+
'python_version',
|
| 31 |
+
'python_platform',
|
| 32 |
+
'is_cuda_available',
|
| 33 |
+
'cuda_runtime_version',
|
| 34 |
+
'cuda_module_loading',
|
| 35 |
+
'nvidia_driver_version',
|
| 36 |
+
'nvidia_gpu_models',
|
| 37 |
+
'cudnn_version',
|
| 38 |
+
'pip_version', # 'pip' or 'pip3'
|
| 39 |
+
'pip_packages',
|
| 40 |
+
'conda_packages',
|
| 41 |
+
'hip_compiled_version',
|
| 42 |
+
'hip_runtime_version',
|
| 43 |
+
'miopen_runtime_version',
|
| 44 |
+
'caching_allocator_config',
|
| 45 |
+
'is_xnnpack_available',
|
| 46 |
+
'cpu_info',
|
| 47 |
+
])
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def run(command):
|
| 51 |
+
"""Returns (return-code, stdout, stderr)"""
|
| 52 |
+
shell = True if type(command) is str else False
|
| 53 |
+
p = subprocess.Popen(command, stdout=subprocess.PIPE,
|
| 54 |
+
stderr=subprocess.PIPE, shell=shell)
|
| 55 |
+
raw_output, raw_err = p.communicate()
|
| 56 |
+
rc = p.returncode
|
| 57 |
+
if get_platform() == 'win32':
|
| 58 |
+
enc = 'oem'
|
| 59 |
+
else:
|
| 60 |
+
enc = locale.getpreferredencoding()
|
| 61 |
+
output = raw_output.decode(enc)
|
| 62 |
+
err = raw_err.decode(enc)
|
| 63 |
+
return rc, output.strip(), err.strip()
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def run_and_read_all(run_lambda, command):
|
| 67 |
+
"""Runs command using run_lambda; reads and returns entire output if rc is 0"""
|
| 68 |
+
rc, out, _ = run_lambda(command)
|
| 69 |
+
if rc != 0:
|
| 70 |
+
return None
|
| 71 |
+
return out
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def run_and_parse_first_match(run_lambda, command, regex):
|
| 75 |
+
"""Runs command using run_lambda, returns the first regex match if it exists"""
|
| 76 |
+
rc, out, _ = run_lambda(command)
|
| 77 |
+
if rc != 0:
|
| 78 |
+
return None
|
| 79 |
+
match = re.search(regex, out)
|
| 80 |
+
if match is None:
|
| 81 |
+
return None
|
| 82 |
+
return match.group(1)
|
| 83 |
+
|
| 84 |
+
def run_and_return_first_line(run_lambda, command):
|
| 85 |
+
"""Runs command using run_lambda and returns first line if output is not empty"""
|
| 86 |
+
rc, out, _ = run_lambda(command)
|
| 87 |
+
if rc != 0:
|
| 88 |
+
return None
|
| 89 |
+
return out.split('\n')[0]
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def get_conda_packages(run_lambda):
|
| 93 |
+
conda = os.environ.get('CONDA_EXE', 'conda')
|
| 94 |
+
out = run_and_read_all(run_lambda, "{} list".format(conda))
|
| 95 |
+
if out is None:
|
| 96 |
+
return out
|
| 97 |
+
|
| 98 |
+
return "\n".join(
|
| 99 |
+
line
|
| 100 |
+
for line in out.splitlines()
|
| 101 |
+
if not line.startswith("#")
|
| 102 |
+
and any(
|
| 103 |
+
name in line
|
| 104 |
+
for name in {
|
| 105 |
+
"torch",
|
| 106 |
+
"numpy",
|
| 107 |
+
"cudatoolkit",
|
| 108 |
+
"soumith",
|
| 109 |
+
"mkl",
|
| 110 |
+
"magma",
|
| 111 |
+
"triton",
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
def get_gcc_version(run_lambda):
|
| 117 |
+
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
|
| 118 |
+
|
| 119 |
+
def get_clang_version(run_lambda):
|
| 120 |
+
return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def get_cmake_version(run_lambda):
|
| 124 |
+
return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_nvidia_driver_version(run_lambda):
|
| 128 |
+
if get_platform() == 'darwin':
|
| 129 |
+
cmd = 'kextstat | grep -i cuda'
|
| 130 |
+
return run_and_parse_first_match(run_lambda, cmd,
|
| 131 |
+
r'com[.]nvidia[.]CUDA [(](.*?)[)]')
|
| 132 |
+
smi = get_nvidia_smi()
|
| 133 |
+
return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def get_gpu_info(run_lambda):
|
| 137 |
+
if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
|
| 138 |
+
if TORCH_AVAILABLE and torch.cuda.is_available():
|
| 139 |
+
return torch.cuda.get_device_name(None)
|
| 140 |
+
return None
|
| 141 |
+
smi = get_nvidia_smi()
|
| 142 |
+
uuid_regex = re.compile(r' \(UUID: .+?\)')
|
| 143 |
+
rc, out, _ = run_lambda(smi + ' -L')
|
| 144 |
+
if rc != 0:
|
| 145 |
+
return None
|
| 146 |
+
# Anonymize GPUs by removing their UUID
|
| 147 |
+
return re.sub(uuid_regex, '', out)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def get_running_cuda_version(run_lambda):
|
| 151 |
+
return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def get_cudnn_version(run_lambda):
|
| 155 |
+
"""This will return a list of libcudnn.so; it's hard to tell which one is being used"""
|
| 156 |
+
if get_platform() == 'win32':
|
| 157 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
| 158 |
+
cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
|
| 159 |
+
where_cmd = os.path.join(system_root, 'System32', 'where')
|
| 160 |
+
cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
|
| 161 |
+
elif get_platform() == 'darwin':
|
| 162 |
+
# CUDA libraries and drivers can be found in /usr/local/cuda/. See
|
| 163 |
+
# https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
|
| 164 |
+
# https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
|
| 165 |
+
# Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
|
| 166 |
+
cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
|
| 167 |
+
else:
|
| 168 |
+
cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
|
| 169 |
+
rc, out, _ = run_lambda(cudnn_cmd)
|
| 170 |
+
# find will return 1 if there are permission errors or if not found
|
| 171 |
+
if len(out) == 0 or (rc != 1 and rc != 0):
|
| 172 |
+
l = os.environ.get('CUDNN_LIBRARY')
|
| 173 |
+
if l is not None and os.path.isfile(l):
|
| 174 |
+
return os.path.realpath(l)
|
| 175 |
+
return None
|
| 176 |
+
files_set = set()
|
| 177 |
+
for fn in out.split('\n'):
|
| 178 |
+
fn = os.path.realpath(fn) # eliminate symbolic links
|
| 179 |
+
if os.path.isfile(fn):
|
| 180 |
+
files_set.add(fn)
|
| 181 |
+
if not files_set:
|
| 182 |
+
return None
|
| 183 |
+
# Alphabetize the result because the order is non-deterministic otherwise
|
| 184 |
+
files = sorted(files_set)
|
| 185 |
+
if len(files) == 1:
|
| 186 |
+
return files[0]
|
| 187 |
+
result = '\n'.join(files)
|
| 188 |
+
return 'Probably one of the following:\n{}'.format(result)
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def get_nvidia_smi():
|
| 192 |
+
# Note: nvidia-smi is currently available only on Windows and Linux
|
| 193 |
+
smi = 'nvidia-smi'
|
| 194 |
+
if get_platform() == 'win32':
|
| 195 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
| 196 |
+
program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
|
| 197 |
+
legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
|
| 198 |
+
new_path = os.path.join(system_root, 'System32', smi)
|
| 199 |
+
smis = [new_path, legacy_path]
|
| 200 |
+
for candidate_smi in smis:
|
| 201 |
+
if os.path.exists(candidate_smi):
|
| 202 |
+
smi = '"{}"'.format(candidate_smi)
|
| 203 |
+
break
|
| 204 |
+
return smi
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
# example outputs of CPU infos
|
| 208 |
+
# * linux
|
| 209 |
+
# Architecture: x86_64
|
| 210 |
+
# CPU op-mode(s): 32-bit, 64-bit
|
| 211 |
+
# Address sizes: 46 bits physical, 48 bits virtual
|
| 212 |
+
# Byte Order: Little Endian
|
| 213 |
+
# CPU(s): 128
|
| 214 |
+
# On-line CPU(s) list: 0-127
|
| 215 |
+
# Vendor ID: GenuineIntel
|
| 216 |
+
# Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
| 217 |
+
# CPU family: 6
|
| 218 |
+
# Model: 106
|
| 219 |
+
# Thread(s) per core: 2
|
| 220 |
+
# Core(s) per socket: 32
|
| 221 |
+
# Socket(s): 2
|
| 222 |
+
# Stepping: 6
|
| 223 |
+
# BogoMIPS: 5799.78
|
| 224 |
+
# Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
|
| 225 |
+
# sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
|
| 226 |
+
# xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
|
| 227 |
+
# pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
|
| 228 |
+
# hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
|
| 229 |
+
# fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
|
| 230 |
+
# avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
|
| 231 |
+
# xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
|
| 232 |
+
# avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
|
| 233 |
+
# Virtualization features:
|
| 234 |
+
# Hypervisor vendor: KVM
|
| 235 |
+
# Virtualization type: full
|
| 236 |
+
# Caches (sum of all):
|
| 237 |
+
# L1d: 3 MiB (64 instances)
|
| 238 |
+
# L1i: 2 MiB (64 instances)
|
| 239 |
+
# L2: 80 MiB (64 instances)
|
| 240 |
+
# L3: 108 MiB (2 instances)
|
| 241 |
+
# NUMA:
|
| 242 |
+
# NUMA node(s): 2
|
| 243 |
+
# NUMA node0 CPU(s): 0-31,64-95
|
| 244 |
+
# NUMA node1 CPU(s): 32-63,96-127
|
| 245 |
+
# Vulnerabilities:
|
| 246 |
+
# Itlb multihit: Not affected
|
| 247 |
+
# L1tf: Not affected
|
| 248 |
+
# Mds: Not affected
|
| 249 |
+
# Meltdown: Not affected
|
| 250 |
+
# Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
|
| 251 |
+
# Retbleed: Not affected
|
| 252 |
+
# Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
|
| 253 |
+
# Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization
|
| 254 |
+
# Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
|
| 255 |
+
# Srbds: Not affected
|
| 256 |
+
# Tsx async abort: Not affected
|
| 257 |
+
# * win32
|
| 258 |
+
# Architecture=9
|
| 259 |
+
# CurrentClockSpeed=2900
|
| 260 |
+
# DeviceID=CPU0
|
| 261 |
+
# Family=179
|
| 262 |
+
# L2CacheSize=40960
|
| 263 |
+
# L2CacheSpeed=
|
| 264 |
+
# Manufacturer=GenuineIntel
|
| 265 |
+
# MaxClockSpeed=2900
|
| 266 |
+
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
| 267 |
+
# ProcessorType=3
|
| 268 |
+
# Revision=27142
|
| 269 |
+
#
|
| 270 |
+
# Architecture=9
|
| 271 |
+
# CurrentClockSpeed=2900
|
| 272 |
+
# DeviceID=CPU1
|
| 273 |
+
# Family=179
|
| 274 |
+
# L2CacheSize=40960
|
| 275 |
+
# L2CacheSpeed=
|
| 276 |
+
# Manufacturer=GenuineIntel
|
| 277 |
+
# MaxClockSpeed=2900
|
| 278 |
+
# Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
|
| 279 |
+
# ProcessorType=3
|
| 280 |
+
# Revision=27142
|
| 281 |
+
|
| 282 |
+
def get_cpu_info(run_lambda):
|
| 283 |
+
rc, out, err = 0, '', ''
|
| 284 |
+
if get_platform() == 'linux':
|
| 285 |
+
rc, out, err = run_lambda('lscpu')
|
| 286 |
+
elif get_platform() == 'win32':
|
| 287 |
+
rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,\
|
| 288 |
+
CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
|
| 289 |
+
elif get_platform() == 'darwin':
|
| 290 |
+
rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
|
| 291 |
+
cpu_info = 'None'
|
| 292 |
+
if rc == 0:
|
| 293 |
+
cpu_info = out
|
| 294 |
+
else:
|
| 295 |
+
cpu_info = err
|
| 296 |
+
return cpu_info
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def get_platform():
|
| 300 |
+
if sys.platform.startswith('linux'):
|
| 301 |
+
return 'linux'
|
| 302 |
+
elif sys.platform.startswith('win32'):
|
| 303 |
+
return 'win32'
|
| 304 |
+
elif sys.platform.startswith('cygwin'):
|
| 305 |
+
return 'cygwin'
|
| 306 |
+
elif sys.platform.startswith('darwin'):
|
| 307 |
+
return 'darwin'
|
| 308 |
+
else:
|
| 309 |
+
return sys.platform
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def get_mac_version(run_lambda):
|
| 313 |
+
return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def get_windows_version(run_lambda):
|
| 317 |
+
system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
|
| 318 |
+
wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
|
| 319 |
+
findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
|
| 320 |
+
return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def get_lsb_version(run_lambda):
|
| 324 |
+
return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def check_release_file(run_lambda):
|
| 328 |
+
return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
|
| 329 |
+
r'PRETTY_NAME="(.*)"')
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def get_os(run_lambda):
|
| 333 |
+
from platform import machine
|
| 334 |
+
platform = get_platform()
|
| 335 |
+
|
| 336 |
+
if platform == 'win32' or platform == 'cygwin':
|
| 337 |
+
return get_windows_version(run_lambda)
|
| 338 |
+
|
| 339 |
+
if platform == 'darwin':
|
| 340 |
+
version = get_mac_version(run_lambda)
|
| 341 |
+
if version is None:
|
| 342 |
+
return None
|
| 343 |
+
return 'macOS {} ({})'.format(version, machine())
|
| 344 |
+
|
| 345 |
+
if platform == 'linux':
|
| 346 |
+
# Ubuntu/Debian based
|
| 347 |
+
desc = get_lsb_version(run_lambda)
|
| 348 |
+
if desc is not None:
|
| 349 |
+
return '{} ({})'.format(desc, machine())
|
| 350 |
+
|
| 351 |
+
# Try reading /etc/*-release
|
| 352 |
+
desc = check_release_file(run_lambda)
|
| 353 |
+
if desc is not None:
|
| 354 |
+
return '{} ({})'.format(desc, machine())
|
| 355 |
+
|
| 356 |
+
return '{} ({})'.format(platform, machine())
|
| 357 |
+
|
| 358 |
+
# Unknown platform
|
| 359 |
+
return platform
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def get_python_platform():
|
| 363 |
+
import platform
|
| 364 |
+
return platform.platform()
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def get_libc_version():
|
| 368 |
+
import platform
|
| 369 |
+
if get_platform() != 'linux':
|
| 370 |
+
return 'N/A'
|
| 371 |
+
return '-'.join(platform.libc_ver())
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def get_pip_packages(run_lambda):
|
| 375 |
+
"""Returns `pip list` output. Note: will also find conda-installed pytorch
|
| 376 |
+
and numpy packages."""
|
| 377 |
+
# People generally have `pip` as `pip` or `pip3`
|
| 378 |
+
# But here it is invoked as `python -mpip`
|
| 379 |
+
def run_with_pip(pip):
|
| 380 |
+
out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
|
| 381 |
+
return "\n".join(
|
| 382 |
+
line
|
| 383 |
+
for line in out.splitlines()
|
| 384 |
+
if any(
|
| 385 |
+
name in line
|
| 386 |
+
for name in {
|
| 387 |
+
"torch",
|
| 388 |
+
"numpy",
|
| 389 |
+
"mypy",
|
| 390 |
+
"flake8",
|
| 391 |
+
"triton",
|
| 392 |
+
}
|
| 393 |
+
)
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
|
| 397 |
+
out = run_with_pip([sys.executable, '-mpip'])
|
| 398 |
+
|
| 399 |
+
return pip_version, out
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def get_cachingallocator_config():
|
| 403 |
+
ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
|
| 404 |
+
return ca_config
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def get_cuda_module_loading_config():
|
| 408 |
+
if TORCH_AVAILABLE and torch.cuda.is_available():
|
| 409 |
+
torch.cuda.init()
|
| 410 |
+
config = os.environ.get('CUDA_MODULE_LOADING', '')
|
| 411 |
+
return config
|
| 412 |
+
else:
|
| 413 |
+
return "N/A"
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def is_xnnpack_available():
|
| 417 |
+
if TORCH_AVAILABLE:
|
| 418 |
+
import torch.backends.xnnpack
|
| 419 |
+
return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
|
| 420 |
+
else:
|
| 421 |
+
return "N/A"
|
| 422 |
+
|
| 423 |
+
def get_env_info():
|
| 424 |
+
run_lambda = run
|
| 425 |
+
pip_version, pip_list_output = get_pip_packages(run_lambda)
|
| 426 |
+
|
| 427 |
+
if TORCH_AVAILABLE:
|
| 428 |
+
version_str = torch.__version__
|
| 429 |
+
debug_mode_str = str(torch.version.debug)
|
| 430 |
+
cuda_available_str = str(torch.cuda.is_available())
|
| 431 |
+
cuda_version_str = torch.version.cuda
|
| 432 |
+
if not hasattr(torch.version, 'hip') or torch.version.hip is None: # cuda version
|
| 433 |
+
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
|
| 434 |
+
else: # HIP version
|
| 435 |
+
def get_version_or_na(cfg, prefix):
|
| 436 |
+
_lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
|
| 437 |
+
return _lst[0] if _lst else 'N/A'
|
| 438 |
+
|
| 439 |
+
cfg = torch._C._show_config().split('\n')
|
| 440 |
+
hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
|
| 441 |
+
miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
|
| 442 |
+
cuda_version_str = 'N/A'
|
| 443 |
+
hip_compiled_version = torch.version.hip
|
| 444 |
+
else:
|
| 445 |
+
version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
|
| 446 |
+
hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
|
| 447 |
+
|
| 448 |
+
sys_version = sys.version.replace("\n", " ")
|
| 449 |
+
|
| 450 |
+
return SystemEnv(
|
| 451 |
+
torch_version=version_str,
|
| 452 |
+
is_debug_build=debug_mode_str,
|
| 453 |
+
python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
|
| 454 |
+
python_platform=get_python_platform(),
|
| 455 |
+
is_cuda_available=cuda_available_str,
|
| 456 |
+
cuda_compiled_version=cuda_version_str,
|
| 457 |
+
cuda_runtime_version=get_running_cuda_version(run_lambda),
|
| 458 |
+
cuda_module_loading=get_cuda_module_loading_config(),
|
| 459 |
+
nvidia_gpu_models=get_gpu_info(run_lambda),
|
| 460 |
+
nvidia_driver_version=get_nvidia_driver_version(run_lambda),
|
| 461 |
+
cudnn_version=get_cudnn_version(run_lambda),
|
| 462 |
+
hip_compiled_version=hip_compiled_version,
|
| 463 |
+
hip_runtime_version=hip_runtime_version,
|
| 464 |
+
miopen_runtime_version=miopen_runtime_version,
|
| 465 |
+
pip_version=pip_version,
|
| 466 |
+
pip_packages=pip_list_output,
|
| 467 |
+
conda_packages=get_conda_packages(run_lambda),
|
| 468 |
+
os=get_os(run_lambda),
|
| 469 |
+
libc_version=get_libc_version(),
|
| 470 |
+
gcc_version=get_gcc_version(run_lambda),
|
| 471 |
+
clang_version=get_clang_version(run_lambda),
|
| 472 |
+
cmake_version=get_cmake_version(run_lambda),
|
| 473 |
+
caching_allocator_config=get_cachingallocator_config(),
|
| 474 |
+
is_xnnpack_available=is_xnnpack_available(),
|
| 475 |
+
cpu_info=get_cpu_info(run_lambda),
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
env_info_fmt = """
|
| 479 |
+
PyTorch version: {torch_version}
|
| 480 |
+
Is debug build: {is_debug_build}
|
| 481 |
+
CUDA used to build PyTorch: {cuda_compiled_version}
|
| 482 |
+
ROCM used to build PyTorch: {hip_compiled_version}
|
| 483 |
+
|
| 484 |
+
OS: {os}
|
| 485 |
+
GCC version: {gcc_version}
|
| 486 |
+
Clang version: {clang_version}
|
| 487 |
+
CMake version: {cmake_version}
|
| 488 |
+
Libc version: {libc_version}
|
| 489 |
+
|
| 490 |
+
Python version: {python_version}
|
| 491 |
+
Python platform: {python_platform}
|
| 492 |
+
Is CUDA available: {is_cuda_available}
|
| 493 |
+
CUDA runtime version: {cuda_runtime_version}
|
| 494 |
+
CUDA_MODULE_LOADING set to: {cuda_module_loading}
|
| 495 |
+
GPU models and configuration: {nvidia_gpu_models}
|
| 496 |
+
Nvidia driver version: {nvidia_driver_version}
|
| 497 |
+
cuDNN version: {cudnn_version}
|
| 498 |
+
HIP runtime version: {hip_runtime_version}
|
| 499 |
+
MIOpen runtime version: {miopen_runtime_version}
|
| 500 |
+
Is XNNPACK available: {is_xnnpack_available}
|
| 501 |
+
|
| 502 |
+
CPU:
|
| 503 |
+
{cpu_info}
|
| 504 |
+
|
| 505 |
+
Versions of relevant libraries:
|
| 506 |
+
{pip_packages}
|
| 507 |
+
{conda_packages}
|
| 508 |
+
""".strip()
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
def pretty_str(envinfo):
|
| 512 |
+
def replace_nones(dct, replacement='Could not collect'):
|
| 513 |
+
for key in dct.keys():
|
| 514 |
+
if dct[key] is not None:
|
| 515 |
+
continue
|
| 516 |
+
dct[key] = replacement
|
| 517 |
+
return dct
|
| 518 |
+
|
| 519 |
+
def replace_bools(dct, true='Yes', false='No'):
|
| 520 |
+
for key in dct.keys():
|
| 521 |
+
if dct[key] is True:
|
| 522 |
+
dct[key] = true
|
| 523 |
+
elif dct[key] is False:
|
| 524 |
+
dct[key] = false
|
| 525 |
+
return dct
|
| 526 |
+
|
| 527 |
+
def prepend(text, tag='[prepend]'):
|
| 528 |
+
lines = text.split('\n')
|
| 529 |
+
updated_lines = [tag + line for line in lines]
|
| 530 |
+
return '\n'.join(updated_lines)
|
| 531 |
+
|
| 532 |
+
def replace_if_empty(text, replacement='No relevant packages'):
|
| 533 |
+
if text is not None and len(text) == 0:
|
| 534 |
+
return replacement
|
| 535 |
+
return text
|
| 536 |
+
|
| 537 |
+
def maybe_start_on_next_line(string):
|
| 538 |
+
# If `string` is multiline, prepend a \n to it.
|
| 539 |
+
if string is not None and len(string.split('\n')) > 1:
|
| 540 |
+
return '\n{}\n'.format(string)
|
| 541 |
+
return string
|
| 542 |
+
|
| 543 |
+
mutable_dict = envinfo._asdict()
|
| 544 |
+
|
| 545 |
+
# If nvidia_gpu_models is multiline, start on the next line
|
| 546 |
+
mutable_dict['nvidia_gpu_models'] = \
|
| 547 |
+
maybe_start_on_next_line(envinfo.nvidia_gpu_models)
|
| 548 |
+
|
| 549 |
+
# If the machine doesn't have CUDA, report some fields as 'No CUDA'
|
| 550 |
+
dynamic_cuda_fields = [
|
| 551 |
+
'cuda_runtime_version',
|
| 552 |
+
'nvidia_gpu_models',
|
| 553 |
+
'nvidia_driver_version',
|
| 554 |
+
]
|
| 555 |
+
all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
|
| 556 |
+
all_dynamic_cuda_fields_missing = all(
|
| 557 |
+
mutable_dict[field] is None for field in dynamic_cuda_fields)
|
| 558 |
+
if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
|
| 559 |
+
for field in all_cuda_fields:
|
| 560 |
+
mutable_dict[field] = 'No CUDA'
|
| 561 |
+
if envinfo.cuda_compiled_version is None:
|
| 562 |
+
mutable_dict['cuda_compiled_version'] = 'None'
|
| 563 |
+
|
| 564 |
+
# Replace True with Yes, False with No
|
| 565 |
+
mutable_dict = replace_bools(mutable_dict)
|
| 566 |
+
|
| 567 |
+
# Replace all None objects with 'Could not collect'
|
| 568 |
+
mutable_dict = replace_nones(mutable_dict)
|
| 569 |
+
|
| 570 |
+
# If either of these are '', replace with 'No relevant packages'
|
| 571 |
+
mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
|
| 572 |
+
mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
|
| 573 |
+
|
| 574 |
+
# Tag conda and pip packages with a prefix
|
| 575 |
+
# If they were previously None, they'll show up as ie '[conda] Could not collect'
|
| 576 |
+
if mutable_dict['pip_packages']:
|
| 577 |
+
mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
|
| 578 |
+
'[{}] '.format(envinfo.pip_version))
|
| 579 |
+
if mutable_dict['conda_packages']:
|
| 580 |
+
mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
|
| 581 |
+
'[conda] ')
|
| 582 |
+
mutable_dict['cpu_info'] = envinfo.cpu_info
|
| 583 |
+
return env_info_fmt.format(**mutable_dict)
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
def get_pretty_env_info():
|
| 587 |
+
return pretty_str(get_env_info())
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
def main():
|
| 591 |
+
print("Collecting environment information...")
|
| 592 |
+
output = get_pretty_env_info()
|
| 593 |
+
print(output)
|
| 594 |
+
|
| 595 |
+
if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
|
| 596 |
+
minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
|
| 597 |
+
if sys.platform == "linux" and os.path.exists(minidump_dir):
|
| 598 |
+
dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
|
| 599 |
+
latest = max(dumps, key=os.path.getctime)
|
| 600 |
+
ctime = os.path.getctime(latest)
|
| 601 |
+
creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
|
| 602 |
+
msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
|
| 603 |
+
"if this is related to your bug please include it when you file a report ***"
|
| 604 |
+
print(msg, file=sys.stderr)
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
if __name__ == '__main__':
|
| 609 |
+
main()
|
init_image.png
ADDED
|
mask_image.png
ADDED
|
model_ids.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
new_scheduler.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
path = "runwayml/stable-diffusion-v1-5"
|
| 6 |
+
|
| 7 |
+
run_compile = False # Set True / False
|
| 8 |
+
use_karras_sigmas = False
|
| 9 |
+
|
| 10 |
+
pipe = DiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
| 11 |
+
pipe = pipe.to("cuda")
|
| 12 |
+
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=use_karras_sigmas)
|
| 13 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
| 14 |
+
|
| 15 |
+
if run_compile:
|
| 16 |
+
print("Run torch compile")
|
| 17 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
| 18 |
+
|
| 19 |
+
prompt = "ghibli style, a fantasy landscape with castles"
|
| 20 |
+
|
| 21 |
+
for _ in range(3):
|
| 22 |
+
images = pipe(prompt=prompt).images
|
prompt_weight.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import torch
|
| 3 |
+
import os
|
| 4 |
+
from compel import Compel, ReturnedEmbeddingsType
|
| 5 |
+
from diffusers import DiffusionPipeline
|
| 6 |
+
from huggingface_hub import HfApi
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
api = HfApi()
|
| 10 |
+
|
| 11 |
+
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16).to("cuda")
|
| 12 |
+
|
| 13 |
+
compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True])
|
| 14 |
+
|
| 15 |
+
# upweight "ball"
|
| 16 |
+
prompt = ["a red cat playing with a (ball)1.5", "a red cat playing with a (ball)0.6"]
|
| 17 |
+
conditioning, pooled = compel(prompt)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# generate image
|
| 21 |
+
generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
|
| 22 |
+
images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
|
| 23 |
+
|
| 24 |
+
for i, image in enumerate(images):
|
| 25 |
+
file_name = f"bb_1_{i}"
|
| 26 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
| 27 |
+
image.save(path)
|
| 28 |
+
|
| 29 |
+
api.upload_file(
|
| 30 |
+
path_or_fileobj=path,
|
| 31 |
+
path_in_repo=path.split("/")[-1],
|
| 32 |
+
repo_id="patrickvonplaten/images",
|
| 33 |
+
repo_type="dataset",
|
| 34 |
+
)
|
| 35 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_bug_conv.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class SuperConv(nn.Conv2d):
|
| 8 |
+
|
| 9 |
+
def __init__(self, *args, is_lora=False, **kwargs):
|
| 10 |
+
super().__init__(*args, **kwargs)
|
| 11 |
+
|
| 12 |
+
self.is_lora = is_lora
|
| 13 |
+
|
| 14 |
+
def forward(self, *args, **kwargs):
|
| 15 |
+
if self.is_lora:
|
| 16 |
+
return 3 + super().forward(*args, **kwargs)
|
| 17 |
+
else:
|
| 18 |
+
return super().forward(*args, **kwargs)
|
| 19 |
+
|
| 20 |
+
# Define a simple Convolutional Neural Network
|
| 21 |
+
class SimpleCNN(nn.Module):
|
| 22 |
+
def __init__(self):
|
| 23 |
+
super(SimpleCNN, self).__init__()
|
| 24 |
+
self.conv1 = SuperConv(3, 6, 5) # Assuming input images are RGB, so 3 input channels
|
| 25 |
+
self.pool = nn.MaxPool2d(2, 2)
|
| 26 |
+
self.conv2 = SuperConv(6, 16, 5)
|
| 27 |
+
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
| 28 |
+
self.fc2 = nn.Linear(120, 84)
|
| 29 |
+
self.fc3 = nn.Linear(84, 10)
|
| 30 |
+
|
| 31 |
+
def forward(self, x):
|
| 32 |
+
x = self.pool(F.relu(self.conv1(x)))
|
| 33 |
+
x = self.pool(F.relu(self.conv2(x)))
|
| 34 |
+
x = x.view(-1, 16 * 5 * 5)
|
| 35 |
+
x = F.relu(self.fc1(x))
|
| 36 |
+
x = F.relu(self.fc2(x))
|
| 37 |
+
x = self.fc3(x)
|
| 38 |
+
return x
|
| 39 |
+
|
| 40 |
+
# Create the network
|
| 41 |
+
net = SimpleCNN()
|
| 42 |
+
|
| 43 |
+
# Initialize weights with dummy values
|
| 44 |
+
for m in net.modules():
|
| 45 |
+
if isinstance(m, nn.Conv2d):
|
| 46 |
+
nn.init.constant_(m.weight, 0.1)
|
| 47 |
+
nn.init.constant_(m.bias, 0.1)
|
| 48 |
+
elif isinstance(m, nn.Linear):
|
| 49 |
+
nn.init.constant_(m.weight, 0.1)
|
| 50 |
+
nn.init.constant_(m.bias, 0.1)
|
| 51 |
+
|
| 52 |
+
# Perform inference
|
| 53 |
+
input = torch.randn(1, 3, 32, 32).to("cuda")
|
| 54 |
+
net = net.to("cuda")
|
| 55 |
+
output = net(input)
|
| 56 |
+
|
| 57 |
+
print(output)
|
| 58 |
+
|
| 59 |
+
net = torch.compile(net, mode="reduce-overhead", fullgraph=True)
|
| 60 |
+
|
| 61 |
+
output = net(input)
|
| 62 |
+
|
| 63 |
+
print(output)
|
run_local_fuse_xl.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from huggingface_hub import HfApi
|
| 3 |
+
import torch
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
api = HfApi()
|
| 9 |
+
start_time = time.time()
|
| 10 |
+
|
| 11 |
+
from diffusers import DiffusionPipeline
|
| 12 |
+
import torch
|
| 13 |
+
|
| 14 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
| 15 |
+
pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
|
| 16 |
+
pipe.unet.fuse_lora()
|
| 17 |
+
|
| 18 |
+
pipe.to(torch_dtype=torch.float16)
|
| 19 |
+
pipe.to("cuda")
|
| 20 |
+
|
| 21 |
+
torch.manual_seed(0)
|
| 22 |
+
|
| 23 |
+
prompt = "beautiful scenery nature glass bottle landscape, , purple galaxy bottle"
|
| 24 |
+
negative_prompt = "text, watermark"
|
| 25 |
+
|
| 26 |
+
image = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=25).images[0]
|
| 27 |
+
|
| 28 |
+
file_name = f"aaa"
|
| 29 |
+
path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
|
| 30 |
+
image.save(path)
|
| 31 |
+
|
| 32 |
+
api.upload_file(
|
| 33 |
+
path_or_fileobj=path,
|
| 34 |
+
path_in_repo=path.split("/")[-1],
|
| 35 |
+
repo_id="patrickvonplaten/images",
|
| 36 |
+
repo_type="dataset",
|
| 37 |
+
)
|
| 38 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_local_xl.py
CHANGED
|
@@ -19,15 +19,14 @@ start_time = time.time()
|
|
| 19 |
# use_refiner = bool(int(sys.argv[1]))
|
| 20 |
use_refiner = True
|
| 21 |
use_diffusers = True
|
| 22 |
-
path = "/
|
| 23 |
-
refiner_path = "/
|
| 24 |
-
vae_path = "/
|
| 25 |
-
vae_path = "/home/patrick/sai/sdxl-vae"
|
| 26 |
|
| 27 |
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
| 28 |
if use_diffusers:
|
| 29 |
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 30 |
-
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 31 |
print(time.time() - start_time)
|
| 32 |
pipe.to("cuda")
|
| 33 |
|
|
|
|
| 19 |
# use_refiner = bool(int(sys.argv[1]))
|
| 20 |
use_refiner = True
|
| 21 |
use_diffusers = True
|
| 22 |
+
path = "stabilityai/stable-diffusion-xl-base-1.0"
|
| 23 |
+
refiner_path = "stabilityai/stable-diffusion-xl-refiner-1.0"
|
| 24 |
+
vae_path = "stabilityai/sdxl-vae"
|
|
|
|
| 25 |
|
| 26 |
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
| 27 |
if use_diffusers:
|
| 28 |
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
| 29 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True, add_watermarker=False)
|
| 30 |
print(time.time() - start_time)
|
| 31 |
pipe.to("cuda")
|
| 32 |
|
run_lora.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import StableDiffusionPipeline, KDPM2DiscreteScheduler, StableDiffusionImg2ImgPipeline, HeunDiscreteScheduler, KDPM2AncestralDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
from huggingface_hub import HfApi
|
| 6 |
+
# from compel import Compel
|
| 7 |
+
import torch
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import requests
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from io import BytesIO
|
| 13 |
+
|
| 14 |
+
path = "runwayml/stable-diffusion-v1-5"
|
| 15 |
+
lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
| 16 |
+
|
| 17 |
+
api = HfApi()
|
| 18 |
+
start_time = time.time()
|
| 19 |
+
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
| 20 |
+
pipe.load_lora_weights(lora_id)
|
| 21 |
+
pipe = pipe.to("cuda")
|
| 22 |
+
|
| 23 |
+
prompt = "a red sks dog"
|
| 24 |
+
|
| 25 |
+
images = pipe(prompt=prompt,
|
| 26 |
+
num_inference_steps=15,
|
| 27 |
+
cross_attention_kwargs={"scale": 0.5},
|
| 28 |
+
generator=torch.manual_seed(0)
|
| 29 |
+
).images
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
for i, image in enumerate(images):
|
| 33 |
+
file_name = f"aa_{i}"
|
| 34 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
| 35 |
+
image.save(path)
|
| 36 |
+
|
| 37 |
+
api.upload_file(
|
| 38 |
+
path_or_fileobj=path,
|
| 39 |
+
path_in_repo=path.split("/")[-1],
|
| 40 |
+
repo_id="patrickvonplaten/images",
|
| 41 |
+
repo_type="dataset",
|
| 42 |
+
)
|
| 43 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_wuerst.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import torch
|
| 3 |
+
from diffusers import AutoPipelineForText2Image
|
| 4 |
+
from huggingface_hub import HfApi
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
api = HfApi()
|
| 12 |
+
|
| 13 |
+
pipe = AutoPipelineForText2Image.from_pretrained("warp-diffusion/WuerstchenGeneratorPipeline", torch_dtype=torch.float16).to("cuda")
|
| 14 |
+
|
| 15 |
+
prompt = [
|
| 16 |
+
"An old destroyed car standing on a cliff in norway, cinematic photography",
|
| 17 |
+
"Western movie, closeup cinematic photography",
|
| 18 |
+
"Pink nike shoe commercial, closeup cinematic photography",
|
| 19 |
+
"Croatia, closeup cinematic photography",
|
| 20 |
+
"South Tyrol mountains at sunset, closeup cinematic photography",
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
images = pipe(prompt, guidance_scale=8.0, width=1024, height=1024).images
|
| 25 |
+
|
| 26 |
+
for i, image in enumerate(images):
|
| 27 |
+
file_name = f"bb_1_{i}"
|
| 28 |
+
path = os.path.join(Path.home(), "images", f"{file_name}.png")
|
| 29 |
+
image.save(path)
|
| 30 |
+
|
| 31 |
+
api.upload_file(
|
| 32 |
+
path_or_fileobj=path,
|
| 33 |
+
path_in_repo=path.split("/")[-1],
|
| 34 |
+
repo_id="patrickvonplaten/images",
|
| 35 |
+
repo_type="dataset",
|
| 36 |
+
)
|
| 37 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
run_xl_lora.py
CHANGED
|
@@ -8,7 +8,10 @@ import os
|
|
| 8 |
api = HfApi()
|
| 9 |
|
| 10 |
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
| 11 |
-
pipe.load_lora_weights("
|
|
|
|
|
|
|
|
|
|
| 12 |
pipe.to(torch_dtype=torch.float16)
|
| 13 |
pipe.to("cuda")
|
| 14 |
|
|
|
|
| 8 |
api = HfApi()
|
| 9 |
|
| 10 |
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
| 11 |
+
pipe.load_lora_weights("stabilityai/stable-diffusion-xl-base-1.0", weight_name="sd_xl_offset_example-lora_1.0.safetensors")
|
| 12 |
+
# pipe.unet.fuse_lora()
|
| 13 |
+
# 7.8 it/s to beat
|
| 14 |
+
#
|
| 15 |
pipe.to(torch_dtype=torch.float16)
|
| 16 |
pipe.to("cuda")
|
| 17 |
|
sd_xl_inpaint.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import AutoPipelineForInpainting, AutoPipelineForImage2Image
|
| 3 |
+
from diffusers.utils import load_image
|
| 4 |
+
import torch
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import os
|
| 7 |
+
from huggingface_hub import HfApi
|
| 8 |
+
|
| 9 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 10 |
+
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
+
api = HfApi()
|
| 12 |
+
|
| 13 |
+
pipe = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
|
| 14 |
+
|
| 15 |
+
pipe = pipe.to(torch_device)
|
| 16 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 17 |
+
|
| 18 |
+
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
|
| 19 |
+
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
|
| 20 |
+
|
| 21 |
+
image = load_image(img_url)
|
| 22 |
+
mask_image = load_image(mask_url)
|
| 23 |
+
|
| 24 |
+
prompt = "dslr photography of an empty bench, high quality"
|
| 25 |
+
generator = torch.Generator(device="cuda").manual_seed(0)
|
| 26 |
+
|
| 27 |
+
image = pipe(
|
| 28 |
+
prompt=prompt,
|
| 29 |
+
image=image,
|
| 30 |
+
mask_image=mask_image,
|
| 31 |
+
guidance_scale=8.0,
|
| 32 |
+
num_inference_steps=20,
|
| 33 |
+
generator=generator,
|
| 34 |
+
).images[0]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
image = image.resize((1024, 1024))
|
| 38 |
+
|
| 39 |
+
pipe = AutoPipelineForInpainting.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
|
| 40 |
+
pipe.to("cuda")
|
| 41 |
+
|
| 42 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 43 |
+
|
| 44 |
+
image = pipe(
|
| 45 |
+
prompt=prompt,
|
| 46 |
+
image=image,
|
| 47 |
+
mask_image=mask_image,
|
| 48 |
+
guidance_scale=8.0,
|
| 49 |
+
num_inference_steps=100,
|
| 50 |
+
strength=0.2,
|
| 51 |
+
generator=generator,
|
| 52 |
+
).images[0]
|
| 53 |
+
|
| 54 |
+
pipe = AutoPipelineForImage2Image.from_pipe(pipe)
|
| 55 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 56 |
+
|
| 57 |
+
image = pipe(
|
| 58 |
+
prompt=prompt,
|
| 59 |
+
image=image,
|
| 60 |
+
guidance_scale=8.0,
|
| 61 |
+
num_inference_steps=100,
|
| 62 |
+
strength=0.2,
|
| 63 |
+
generator=generator,
|
| 64 |
+
).images[0]
|
| 65 |
+
|
| 66 |
+
file_name = f"aaa"
|
| 67 |
+
path = os.path.join(Path.home(), "images", "ediffi_sdxl", f"{file_name}.png")
|
| 68 |
+
image.save(path)
|
| 69 |
+
|
| 70 |
+
api.upload_file(
|
| 71 |
+
path_or_fileobj=path,
|
| 72 |
+
path_in_repo=path.split("/")[-1],
|
| 73 |
+
repo_id="patrickvonplaten/images",
|
| 74 |
+
repo_type="dataset",
|
| 75 |
+
)
|
| 76 |
+
print(f"https://huggingface.co/datasets/patrickvonplaten/images/blob/main/{file_name}.png")
|
train_unet.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from diffusers import UNet2DConditionModel
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
torch.cuda.set_per_process_memory_fraction(0.5, device="cuda:1")
|
| 6 |
+
|
| 7 |
+
unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet", variant="fp16", torch_dtype=torch.float16)
|
| 8 |
+
unet.train()
|
| 9 |
+
unet.enable_gradient_checkpointing()
|
| 10 |
+
unet = unet.to("cuda:1")
|
| 11 |
+
|
| 12 |
+
batch_size = 2
|
| 13 |
+
|
| 14 |
+
sample = torch.randn((1, 4, 128, 128)).half().to(unet.device).repeat(batch_size, 1, 1, 1)
|
| 15 |
+
time_ids = (torch.arange(6) / 6)[None, :].half().to(unet.device).repeat(batch_size, 1)
|
| 16 |
+
encoder_hidden_states = torch.randn((1, 77, 2048)).half().to(unet.device).repeat(batch_size, 1, 1)
|
| 17 |
+
text_embeds = torch.randn((1, 1280)).half().to(unet.device).repeat(batch_size, 1)
|
| 18 |
+
|
| 19 |
+
out = unet(sample, 1.0, added_cond_kwargs={"time_ids": time_ids, "text_embeds": text_embeds}, encoder_hidden_states=encoder_hidden_states).sample
|
| 20 |
+
|
| 21 |
+
loss = ((out - sample) ** 2).mean()
|
| 22 |
+
loss.backward()
|
| 23 |
+
|
| 24 |
+
print(torch.cuda.max_memory_allocated(device=unet.device))
|