eykarim commited on Nov 26, 2022

Commit

ea8fb12

1 Parent(s): f8c8561

Upload 21 files

Browse files

Files changed (21) hide show

README.md +70 -0
create_handler.ipynb +275 -0
feature_extractor/preprocessor_config.json +20 -0
handler.py +42 -0
model_index.json +32 -0
requirements.txt +1 -0
safety_checker/config.json +174 -0
safety_checker/pytorch_model.bin +3 -0
sample.jpg +0 -0
scheduler/.ipynb_checkpoints/scheduler_config-checkpoint.json +9 -0
scheduler/scheduler_config.json +9 -0
text_encoder/config.json +24 -0
text_encoder/pytorch_model.bin +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +34 -0
tokenizer/vocab.json +0 -0
unet/config.json +37 -0
unet/diffusion_pytorch_model.bin +3 -0
vae/config.json +29 -0
vae/diffusion_pytorch_model.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+license: creativeml-openrail-m
+tags:
+- stable-diffusion
+- stable-diffusion-diffusers
+- text-to-image
+- endpoints-template
+inference: false
+---
+# Fork of [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
+> Stable Diffusion is a latent text-to-image diffusion model capable of generating photo-realistic images given any text input.
+> For more information about how Stable Diffusion functions, please have a look at [🤗's Stable Diffusion with 🧨Diffusers blog](https://huggingface.co/blog/stable_diffusion).
+For more information about the model, license and limitations check the original model card at [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4).
+### License (CreativeML OpenRAIL-M)
+The full license can be found here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
+---
+This repository implements a custom `handler` task for `text-to-image` for 🤗 Inference Endpoints. The code for the customized pipeline is in the [pipeline.py](https://huggingface.co/philschmid/stable-diffusion-v1-4-endpoints/blob/main/handler.py).
+There is also a [notebook](https://huggingface.co/philschmid/stable-diffusion-v1-4-endpoints/blob/main/create_handler.ipynb) included, on how to create the `handler.py`
+### expected Request payload
+```json
+{
+    "inputs": "A prompt used for image generation"
+}
+```
+below is an example on how to run a request using Python and `requests`.
+## Run Request
+```python
+import json
+from typing import List
+import requests as r
+import base64
+from PIL import Image
+from io import BytesIO
+ENDPOINT_URL = ""
+HF_TOKEN = ""
+# helper decoder
+def decode_base64_image(image_string):
+  base64_image = base64.b64decode(image_string)
+  buffer = BytesIO(base64_image)
+  return  Image.open(buffer)
+def predict(prompt:str=None):
+    payload = {"inputs": code_snippet,"parameters": parameters}
+    response = r.post(
+        ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json={"inputs": prompt}
+    )
+    resp = response.json()
+    return decode_base64_image(resp["image"])
+prediction = predict(
+    prompt="the first animal on the mars"
+)
+```
+expected output
+![sample](sample.jpg)

create_handler.ipynb ADDED Viewed

	@@ -0,0 +1,275 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup & Installation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting requirements.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile requirements.txt\n",
+    "diffusers==0.2.4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -r requirements.txt --upgrade"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Create Custom Handler for Inference Endpoints\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "device(type='cuda')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if device.type != 'cuda':\n",
+    "    raise ValueError(\"need to run on GPU\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting handler.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile handler.py\n",
+    "from typing import  Dict, List, Any\n",
+    "import torch\n",
+    "from torch import autocast\n",
+    "from diffusers import StableDiffusionPipeline\n",
+    "import base64\n",
+    "from io import BytesIO\n",
+    "\n",
+    "\n",
+    "# set device\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "\n",
+    "if device.type != 'cuda':\n",
+    "    raise ValueError(\"need to run on GPU\")\n",
+    "\n",
+    "class EndpointHandler():\n",
+    "    def __init__(self, path=\"\"):\n",
+    "        # load the optimized model\n",
+    "        self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)\n",
+    "        self.pipe = self.pipe.to(device)\n",
+    "\n",
+    "\n",
+    "    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:\n",
+    "        \"\"\"\n",
+    "        Args:\n",
+    "            data (:obj:):\n",
+    "                includes the input data and the parameters for the inference.\n",
+    "        Return:\n",
+    "            A :obj:`dict`:. base64 encoded image\n",
+    "        \"\"\"\n",
+    "        inputs = data.pop(\"inputs\", data)\n",
+    "        \n",
+    "        # run inference pipeline\n",
+    "        with autocast(device.type):\n",
+    "            image = self.pipe(inputs, guidance_scale=7.5)[\"sample\"][0]  \n",
+    "            \n",
+    "        # encode image as base 64\n",
+    "        buffered = BytesIO()\n",
+    "        image.save(buffered, format=\"JPEG\")\n",
+    "        img_str = base64.b64encode(buffered.getvalue())\n",
+    "\n",
+    "        # postprocess the prediction\n",
+    "        return {\"image\": img_str.decode()}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "test custom pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.11.0+cu113'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "torch.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from handler import EndpointHandler\n",
+    "\n",
+    "# init handler\n",
+    "my_handler = EndpointHandler(path=\".\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "376de150f16b4b4bb0c3ab8c513de5c0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import base64\n",
+    "from PIL import Image\n",
+    "from io import BytesIO\n",
+    "import json\n",
+    "\n",
+    "# helper decoder\n",
+    "def decode_base64_image(image_string):\n",
+    "  base64_image = base64.b64decode(image_string)\n",
+    "  buffer = BytesIO(base64_image)\n",
+    "  return  Image.open(buffer)\n",
+    "\n",
+    "# prepare sample payload\n",
+    "request = {\"inputs\": \"a high resulotion image of a macbook\"}\n",
+    "\n",
+    "# test the handler\n",
+    "pred = my_handler(request)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "decode_base64_image(pred[\"image\"]).save(\"sample.jpg\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![test](sample.jpg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.13 ('dev': conda)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "f6dd96c16031089903d5a31ec148b80aeb0d39c32affb1a1080393235fbfa2fc"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

feature_extractor/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}

handler.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from typing import  Dict, List, Any
+import torch
+from torch import autocast
+from diffusers import StableDiffusionPipeline
+import base64
+from io import BytesIO
+# set device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if device.type != 'cuda':
+    raise ValueError("need to run on GPU")
+class EndpointHandler():
+    def __init__(self, path=""):
+        # load the optimized model
+        self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+        self.pipe = self.pipe.to(device)
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        """
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
+        Return:
+            A :obj:`dict`:. base64 encoded image
+        """
+        inputs = data.pop("inputs", data)
+        # run inference pipeline
+        with autocast(device.type):
+            image = self.pipe(inputs, guidance_scale=7.5)["sample"][0]
+        # encode image as base 64
+        buffered = BytesIO()
+        image.save(buffered, format="JPEG")
+        img_str = base64.b64encode(buffered.getvalue())
+        # postprocess the prediction
+        return {"image": img_str.decode()}

model_index.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.2.3",
+  "feature_extractor": [
+    "transformers",
+    "CLIPFeatureExtractor"
+  ],
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
+  ],
+  "scheduler": [
+    "diffusers",
+    "PNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ diffusers==0.2.4

safety_checker/config.json ADDED Viewed

	@@ -0,0 +1,174 @@

+{
+  "_name_or_path": "./safety_checker",
+  "architectures": [
+    "StableDiffusionSafetyChecker"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 77,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.21.1",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 49408
+  },
+  "text_config_dict": {
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12
+  },
+  "torch_dtype": "float16",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 14,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.21.1",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vision_config_dict": {
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "patch_size": 14
+  }
+}

safety_checker/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d37ca6e57ace94e4c2f03ed0f67b6dc83e1ef1160892074917aa68b28e2afc1
+size 608098599

sample.jpg ADDED Viewed

scheduler/.ipynb_checkpoints/scheduler_config-checkpoint.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_class_name": "PNDMScheduler",
+  "_diffusers_version": "0.2.2",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "num_train_timesteps": 1000,
+  "skip_prk_steps": true
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_class_name": "PNDMScheduler",
+  "_diffusers_version": "0.2.3",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "num_train_timesteps": 1000,
+  "skip_prk_steps": true
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "./text_encoder",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "torch_dtype": "float16",
+  "transformers_version": "4.21.1",
+  "vocab_size": 49408
+}

text_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88bd85efb0f84e70521633f578715afb2873db4f2615fdfb1f66e99934715865
+size 246184375

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "name_or_path": "./tokenizer",
+  "pad_token": "<|endoftext|>",
+  "special_tokens_map_file": "./special_tokens_map.json",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.2.3",
+  "_name_or_path": "./unet",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}

unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d98edd280d5e040ee77f5802b8e3be3513de757335d1dedc4e495647e7c2d573
+size 1719312805

vae/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.2.3",
+  "_name_or_path": "./vae",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "out_channels": 3,
+  "sample_size": 512,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51c8904bc921e1e6f354b5fa8e99a1c82ead2f0540114de21557b8abfbb24ad0
+size 167399505