Spaces:

TK156
/

depth-estimation-api

Runtime error

App Files Files Community

TK561 commited on 26 days ago

Commit

7d2a98b

1 Parent(s): 7ef81f1

fix: 実際の深度推定機能を実装

Browse files

- DepthAnything V2モデルの統合
- Base64画像入力のサポート
- API エンドポイント /api/predict の追加
- 必要な依存関係の追加
- メモリ管理の改善

Files changed (2) hide show

app.py +86 -18
requirements.txt +7 -1

app.py CHANGED Viewed

@@ -1,23 +1,91 @@
 import gradio as gr
-def depth_estimation(image):
-    """最もシンプルな深度推定テスト"""
-    if image is None:
-        return None, None
-    # まずは画像をそのまま返すテスト
-    return image, image
-# 最小限のGradio Interface
-demo = gr.Interface(
-    fn=depth_estimation,
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Image(label="元画像"),
-        gr.Image(label="深度マップ")
-    ],
-    title="深度推定 API",
-    description="テスト中"
-)
-demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import numpy as np
+from PIL import Image
+import cv2
+import base64
+import io
+class DepthEstimationAPI:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        model_name = "depth-anything/Depth-Anything-V2-Small-hf"
+        self.processor = AutoImageProcessor.from_pretrained(model_name)
+        self.model = AutoModelForDepthEstimation.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.model.eval()
+        print("Model loaded successfully")
+    def predict(self, image_input):
+        """Process image and return depth map"""
+        try:
+            # Handle different input types
+            if isinstance(image_input, str):
+                # Base64 encoded image
+                if image_input.startswith('data:image'):
+                    header, encoded = image_input.split(',', 1)
+                    image_bytes = base64.b64decode(encoded)
+                    image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+                else:
+                    # File path
+                    image = Image.open(image_input).convert('RGB')
+            else:
+                # PIL Image
+                image = image_input.convert('RGB') if hasattr(image_input, 'convert') else image_input
+            # Process image
+            inputs = self.processor(images=image, return_tensors="pt")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                depth = outputs.predicted_depth.squeeze().cpu().numpy()
+            # Create depth visualization
+            depth_normalized = ((depth - depth.min()) / (depth.max() - depth.min()) * 255).astype(np.uint8)
+            depth_colored = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_VIRIDIS)
+            depth_colored = cv2.cvtColor(depth_colored, cv2.COLOR_BGR2RGB)
+            depth_image = Image.fromarray(depth_colored)
+            # Clean up
+            del inputs, outputs, depth, depth_normalized, depth_colored
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            return [image, depth_image]
+        except Exception as e:
+            print(f"Error in prediction: {e}")
+            return [None, None]
+# Initialize API
+api = DepthEstimationAPI()
+# Create Gradio interface with API support
+with gr.Blocks() as demo:
+    gr.Markdown("# Depth Estimation API")
+    gr.Markdown("AI-powered depth estimation using DepthAnything V2")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload Image")
+            submit_btn = gr.Button("Generate Depth Map", variant="primary")
+        with gr.Column():
+            output_original = gr.Image(type="pil", label="Original Image")
+            output_depth = gr.Image(type="pil", label="Depth Map")
+    # Define the API endpoint
+    submit_btn.click(
+        fn=api.predict,
+        inputs=input_image,
+        outputs=[output_original, output_depth],
+        api_name="predict"  # This creates the /api/predict endpoint
+    )
+# Launch with proper settings for Hugging Face Spaces
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

	@@ -1 +1,7 @@
1	- ~~gradio~~

+torch
+torchvision
+transformers
+gradio
+numpy
+opencv-python-headless
+Pillow