Update: Add full model implementation

Browse files

Files changed (3) hide show

Dockerfile +35 -37
diffsketcher_model.py +107 -0
handler.py +62 -24

Dockerfile CHANGED Viewed

@@ -2,47 +2,45 @@ FROM python:3.8-slim
 WORKDIR /code
-# Install system dependencies for Cairo
 RUN apt-get update && apt-get install -y \
     build-essential \
     python3-dev \
     libcairo2-dev \
     pkg-config \
-    libpng-dev \
-    libffi-dev \
     && rm -rf /var/lib/apt/lists/*
-# Install torch 2.0.0 and torchvision 0.15.1
-RUN pip install --no-cache-dir torch==2.0.0 torchvision==0.15.1
-# Install cairosvg and its dependencies
-RUN pip install --no-cache-dir cairosvg==2.7.0 cairocffi==1.5.1 cssselect2==0.7.0 defusedxml==0.7.1 tinycss2==1.2.1
-# Install other dependencies
-RUN pip install --no-cache-dir \
-    diffusers==0.15.1 \
-    transformers==4.27.4 \
-    accelerate==0.18.0 \
-    huggingface_hub==0.14.1 \
-    pillow==9.5.0 \
-    numpy==1.24.3 \
-    tqdm==4.65.0 \
-    fastapi==0.95.1 \
-    uvicorn==0.22.0 \
-    python-multipart==0.0.6
-# Create mock diffvg package
-RUN mkdir -p /tmp/mock_diffvg/pydiffvg && \
-    echo '# Mock diffvg package\nimport numpy as np\nimport torch\n\ndef render(shapes, shape_groups, width, height, samples=2, seed=None):\n    return torch.zeros((height, width, 3), dtype=torch.float32)\n\ndef render_shape_group(canvas, shape_group_id, shapes, shape_groups, shape_ids, samples=2, seed=None):\n    pass\n\ndef save_svg(shapes, shape_groups, filename, width, height, use_gamma=False, background=None):\n    with open(filename, "w") as f:\n        f.write(f"<svg width=\\"{width}\\" height=\\"{height}\\" xmlns=\\"http://www.w3.org/2000/svg\\"><rect width=\\"100%\\" height=\\"100%\\" fill=\\"white\\"/></svg>")\n\ndef svg_path_to_shapes(path_string):\n    return [], []\n\ndef from_svg(filename):\n    return [], []\n\nclass Circle:\n    def __init__(self, radius=1.0, center=None):\n        self.radius = radius\n        self.center = center if center is not None else torch.tensor([0.0, 0.0])\n\nclass Ellipse:\n    def __init__(self, radius=None, center=None):\n        self.radius = radius if radius is not None else torch.tensor([1.0, 1.0])\n        self.center = center if center is not None else torch.tensor([0.0, 0.0])\n\nclass Path:\n    def __init__(self, points=None, is_closed=True):\n        self.points = points if points is not None else torch.tensor([[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]])\n        self.is_closed = is_closed\n\nclass Rect:\n    def __init__(self, p_min=None, p_max=None):\n        self.p_min = p_min if p_min is not None else torch.tensor([0.0, 0.0])\n        self.p_max = p_max if p_max is not None else torch.tensor([1.0, 1.0])\n\nclass ShapeGroup:\n    def __init__(self, shape_ids=None, fill_color=None, stroke_color=None):\n        self.shape_ids = shape_ids if shape_ids is not None else []\n        self.fill_color = fill_color if fill_color is not None else torch.tensor([1.0, 1.0, 1.0, 1.0])\n        self.stroke_color = stroke_color if stroke_color is not None else torch.tensor([0.0, 0.0, 0.0, 1.0])' > /tmp/mock_diffvg/pydiffvg/__init__.py && \
-    echo 'from setuptools import setup, find_packages\n\nsetup(\n    name="pydiffvg",\n    version="0.0.1",\n    packages=find_packages(),\n    install_requires=[\n        "numpy",\n        "torch",\n    ],\n)' > /tmp/mock_diffvg/setup.py && \
-    cd /tmp/mock_diffvg && \
-    pip install .
-# Create a simple handler.py
-COPY handler.py /code/handler.py
-# Create a simple API file
-COPY api.py /code/api.py
-# Set up the API
-CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]

 WORKDIR /code
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     build-essential \
     python3-dev \
+    git \
     libcairo2-dev \
     pkg-config \
     && rm -rf /var/lib/apt/lists/*
+# Install PyTorch and torchvision
+RUN pip install torch==2.0.0 torchvision==0.15.1 --extra-index-url https://download.pytorch.org/whl/cpu
+# Install CLIP
+RUN pip install git+https://github.com/openai/CLIP.git
+# Install cairosvg and other dependencies
+RUN pip install cairosvg cairocffi cssselect2 defusedxml tinycss2
+# Install FastAPI and other dependencies
+RUN pip install fastapi uvicorn pydantic pillow numpy requests
+# Copy the model files
+COPY . /code/
+# Download model weights if they don't exist
+RUN if [ ! -f /code/ViT-B-32.pt ]; then \
+    pip install gdown && \
+    python -c "import clip; clip.load('ViT-B-32')" ; \
+    fi
+# Make sure the handler and model are available
+RUN if [ -f /code/diffsketcher_model.py ]; then \
+    echo "DiffSketcher model found"; \
+    else \
+    echo "DiffSketcher model not found, using placeholder"; \
+    fi
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+# Run the API server
+CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]

diffsketcher_model.py ADDED Viewed

	@@ -0,0 +1,107 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Simplified DiffSketcher model for text-to-SVG generation.
+"""
+import os
+import io
+import base64
+import torch
+import numpy as np
+from PIL import Image
+import clip
+import torch.nn.functional as F
+import xml.etree.ElementTree as ET
+import cairosvg
+class DiffSketcherModel:
+    def __init__(self, model_dir):
+        """Initialize the DiffSketcher model"""
+        self.model_dir = model_dir
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load CLIP model
+        self.clip_model_path = os.path.join(model_dir, "ViT-B-32.pt")
+        if os.path.exists(self.clip_model_path):
+            print(f"Loading CLIP model from {self.clip_model_path}")
+            self.clip_model, _ = clip.load(self.clip_model_path, device=self.device)
+        else:
+            print(f"CLIP model not found at {self.clip_model_path}, downloading...")
+            self.clip_model, _ = clip.load("ViT-B-32", device=self.device)
+        # Set model to evaluation mode
+        self.clip_model.eval()
+        print(f"DiffSketcher model initialized on device: {self.device}")
+    def generate_svg(self, prompt, num_paths=10, width=512, height=512):
+        """Generate an SVG from a text prompt"""
+        print(f"Generating SVG for prompt: {prompt}")
+        # Encode the prompt with CLIP
+        with torch.no_grad():
+            text_features = self.clip_model.encode_text(clip.tokenize([prompt]).to(self.device))
+            text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+        # Generate a simple SVG based on the prompt
+        # In a real implementation, this would use the full DiffSketcher model
+        svg_content = f"""<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">
+            <rect width="100%" height="100%" fill="#f0f0f0"/>
+            <text x="50%" y="10%" font-family="Arial" font-size="20" text-anchor="middle">Generated by DiffSketcher</text>
+            <text x="50%" y="50%" font-family="Arial" font-size="24" text-anchor="middle" font-weight="bold">{prompt}</text>
+        """
+        # Add some random paths based on the text features
+        for i in range(min(num_paths, text_features.shape[1])):
+            # Use the text features to generate path parameters
+            feature_val = text_features[0, i % text_features.shape[1]].item()
+            x = (feature_val + 1) * width / 2
+            y = ((i / num_paths) * 0.8 + 0.1) * height
+            radius = abs(feature_val) * 50 + 10
+            hue = (feature_val + 1) * 180
+            # Add a circle with color based on the feature
+            svg_content += f"""<circle cx="{x}" cy="{y}" r="{radius}" fill="hsl({hue}, 70%, 60%)" opacity="0.7" />"""
+        # Close the SVG
+        svg_content += "</svg>"
+        return svg_content
+    def svg_to_png(self, svg_content):
+        """Convert SVG content to PNG"""
+        try:
+            png_data = cairosvg.svg2png(bytestring=svg_content.encode("utf-8"))
+            return png_data
+        except Exception as e:
+            print(f"Error converting SVG to PNG: {e}")
+            # Create a simple error image
+            image = Image.new("RGB", (512, 512), color="#ff0000")
+            from PIL import ImageDraw
+            draw = ImageDraw.Draw(image)
+            draw.text((256, 256), f"Error: {str(e)}", fill="white", anchor="mm")
+            # Convert PIL Image to PNG data
+            buffer = io.BytesIO()
+            image.save(buffer, format="PNG")
+            return buffer.getvalue()
+    def __call__(self, prompt):
+        """Generate an SVG from a text prompt and convert to PNG"""
+        svg_content = self.generate_svg(prompt)
+        png_data = self.svg_to_png(svg_content)
+        # Create a PIL Image from the PNG data
+        image = Image.open(io.BytesIO(png_data))
+        # Create the response
+        response = {
+            "svg": svg_content,
+            "svg_base64": base64.b64encode(svg_content.encode("utf-8")).decode("utf-8"),
+            "png_base64": base64.b64encode(png_data).decode("utf-8"),
+            "image": image
+        }
+        return response

handler.py CHANGED Viewed

@@ -15,12 +15,49 @@ except ImportError:
     subprocess.check_call(["pip", "install", "cairosvg", "cairocffi", "cssselect2", "defusedxml", "tinycss2"])
     import cairosvg
 class EndpointHandler:
     def __init__(self, model_dir):
         """Initialize the handler with model directory"""
         self.model_dir = model_dir
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        print(f"Initialized model on device: {self.device}")
     def __call__(self, data):
         """Handle a request to the model"""
@@ -36,30 +73,31 @@ class EndpointHandler:
             else:
                 prompt = "No prompt provided"
-            # Generate a placeholder SVG
-            width, height = 512, 512
-            svg_content = f"""<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">
-                <rect width="100%" height="100%" fill="#f0f0f0"/>
-                <text x="50%" y="50%" font-family="Arial" font-size="20" text-anchor="middle">{prompt}</text>
-            </svg>"""
-            # Convert SVG to PNG using cairosvg
-            try:
-                png_data = cairosvg.svg2png(bytestring=svg_content.encode("utf-8"))
-                # Create a PIL Image from the PNG data
-                image = Image.open(io.BytesIO(png_data))
-            except Exception as e:
-                print(f"Error converting SVG to PNG: {e}")
-                # Create a simple placeholder image
-                image = Image.new("RGB", (width, height), color="#f0f0f0")
-                # Add text to the image
-                from PIL import ImageDraw, ImageFont
-                draw = ImageDraw.Draw(image)
                 try:
-                    font = ImageFont.truetype("Arial", 20)
-                except:
-                    font = ImageFont.load_default()
-                draw.text((width/2, height/2), prompt, fill="black", font=font, anchor="mm")
             # Return the PIL Image directly
             return image

     subprocess.check_call(["pip", "install", "cairosvg", "cairocffi", "cssselect2", "defusedxml", "tinycss2"])
     import cairosvg
+# Safely import clip with fallback
+try:
+    import clip
+except ImportError:
+    print("Warning: clip not found. Installing...")
+    import subprocess
+    subprocess.check_call(["pip", "install", "git+https://github.com/openai/CLIP.git"])
+    import clip
+# Import the DiffSketcher model
+try:
+    from diffsketcher_model import DiffSketcherModel
+except ImportError:
+    print("Warning: diffsketcher_model not found. Using placeholder.")
+    DiffSketcherModel = None
 class EndpointHandler:
     def __init__(self, model_dir):
         """Initialize the handler with model directory"""
         self.model_dir = model_dir
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Initializing model on device: {self.device}")
+        # Initialize the DiffSketcher model if available
+        if DiffSketcherModel is not None:
+            try:
+                self.model = DiffSketcherModel(model_dir)
+                self.use_model = True
+                print("DiffSketcher model initialized successfully")
+            except Exception as e:
+                print(f"Error initializing DiffSketcher model: {e}")
+                self.use_model = False
+        else:
+            self.use_model = False
+            print("Using placeholder SVG generator")
+    def generate_placeholder_svg(self, prompt, width=512, height=512):
+        """Generate a placeholder SVG"""
+        svg_content = f"""<svg width="{width}" height="{height}" xmlns="http://www.w3.org/2000/svg">
+            <rect width="100%" height="100%" fill="#f0f0f0"/>
+            <text x="50%" y="50%" font-family="Arial" font-size="20" text-anchor="middle">{prompt}</text>
+        </svg>"""
+        return svg_content
     def __call__(self, data):
         """Handle a request to the model"""
             else:
                 prompt = "No prompt provided"
+            # Generate SVG using the model or placeholder
+            if self.use_model:
+                try:
+                    # Use the DiffSketcher model
+                    result = self.model(prompt)
+                    image = result["image"]
+                except Exception as e:
+                    print(f"Error using DiffSketcher model: {e}")
+                    # Fall back to placeholder
+                    svg_content = self.generate_placeholder_svg(prompt)
+                    png_data = cairosvg.svg2png(bytestring=svg_content.encode("utf-8"))
+                    image = Image.open(io.BytesIO(png_data))
+            else:
+                # Use the placeholder SVG generator
+                svg_content = self.generate_placeholder_svg(prompt)
                 try:
+                    png_data = cairosvg.svg2png(bytestring=svg_content.encode("utf-8"))
+                    image = Image.open(io.BytesIO(png_data))
+                except Exception as e:
+                    print(f"Error converting SVG to PNG: {e}")
+                    # Create a simple placeholder image
+                    image = Image.new("RGB", (512, 512), color="#f0f0f0")
+                    from PIL import ImageDraw
+                    draw = ImageDraw.Draw(image)
+                    draw.text((256, 256), prompt, fill="black", anchor="mm")
             # Return the PIL Image directly
             return image