Spaces:
Sleeping
Sleeping
File size: 4,898 Bytes
f3a22a2 8f57761 f3a22a2 fbb543d d08d48c f3a22a2 2839b00 f3a22a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import torch
from torchvision import models, transforms
from PIL import Image
import numpy as np
import cv2
# Load pre-trained segmentation model
model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
# Function to perform segmentation
def segment_person(image):
try:
if not isinstance(image, Image.Image):
raise ValueError("Invalid image format. Please upload a valid image.")
preprocess = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
input_tensor = preprocess(image).unsqueeze(0)
with torch.no_grad():
output = model(input_tensor)['out'][0]
output_predictions = output.argmax(0)
person_class = 15 # Person class in DeepLabV3
mask = output_predictions == person_class
mask_np = mask.byte().cpu().numpy()
mask_resized = cv2.resize(mask_np, (image.width, image.height), interpolation=cv2.INTER_NEAREST)
person = np.array(image)
alpha_channel = (mask_resized * 255).astype(np.uint8)
rgba_image = np.dstack((person, alpha_channel))
return Image.fromarray(rgba_image, 'RGBA')
except Exception as e:
raise RuntimeError(f"Error in segmenting person: {e}")
# Function to create stereoscopic images with inserted person
def create_stereo_images(stereo_image, person_image, depth_level, x_position, y_position):
try:
if not isinstance(stereo_image, np.ndarray):
raise ValueError("Invalid stereo image format. Please upload a valid image.")
height, width, _ = stereo_image.shape
midpoint = width // 2
left_image = stereo_image[:, :midpoint]
right_image = stereo_image[:, midpoint:]
left_image = Image.fromarray(cv2.cvtColor(left_image, cv2.COLOR_BGR2RGBA))
right_image = Image.fromarray(cv2.cvtColor(right_image, cv2.COLOR_BGR2RGBA))
disparity_map = {"close": 10, "medium": 1, "far": -10}
scaling_factors = {"close": 1.2, "medium": 1.0, "far": 0.8}
if depth_level not in disparity_map:
raise ValueError("Invalid depth level. Choose from 'close', 'medium', or 'far'.")
disparity = disparity_map[depth_level]
person_scale = scaling_factors[depth_level]
person_image = person_image.resize(
(int(person_image.width * person_scale), int(person_image.height * person_scale)),
Image.LANCZOS
)
left_x, left_y = x_position, y_position
right_x = left_x + disparity
left_image.paste(person_image, (left_x, left_y), person_image)
right_image.paste(person_image, (right_x, left_y), person_image)
return left_image, right_image
except Exception as e:
raise RuntimeError(f"Error in creating stereo images: {e}")
# Function to create an anaglyph image
def create_anaglyph(left_image, right_image):
try:
left_r, _, _ = left_image.convert("RGB").split()
_, right_g, right_b = right_image.convert("RGB").split()
anaglyph_image = Image.merge("RGB", (left_r, right_g, right_b))
return anaglyph_image
except Exception as e:
raise RuntimeError(f"Error in creating anaglyph image: {e}")
# Gradio interface function
def process_images(person_image, stereo_image, depth_level, x_position, y_position):
try:
person_image = segment_person(person_image)
stereo_image = np.array(stereo_image)
left_image, right_image = create_stereo_images(stereo_image, person_image, depth_level, x_position, y_position)
anaglyph_image = create_anaglyph(left_image, right_image)
return anaglyph_image
except Exception as e:
return f"An error occurred: {e}"
# Gradio app setup
with gr.Blocks() as demo:
gr.Markdown("# 3D Anaglyph Image Creator")
gr.Markdown("Upload a person image, select depth, adjust position, and create a 3D anaglyph image.")
with gr.Row():
person_image = gr.Image(label="Upload Person Image", type="pil")
stereo_image = gr.Image(label="Upload Stereo Background Image", type="pil")
depth_level = gr.Radio(["close", "medium", "far"], label="Select Depth Level")
x_position = gr.Slider(0, 2000, step=1, label="X Position")
y_position = gr.Slider(0, 2000, step=1, label="Y Position")
output_image = gr.Image(label="Generated Anaglyph Image")
generate_button = gr.Button("Generate Anaglyph Image")
generate_button.click(process_images, [person_image, stereo_image, depth_level, x_position, y_position], output_image)
# Add reminder text below the button
gr.Markdown("**Reminder:** After adjusting the depth level or position, click the 'Generate Anaglyph Image' button to update the image.")
# Launch the app
demo.launch() |