File size: 4,898 Bytes
f3a22a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f57761
f3a22a2
 
 
 
 
 
fbb543d
d08d48c
f3a22a2
 
 
 
 
 
 
 
 
2839b00
 
f3a22a2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import torch
from torchvision import models, transforms
from PIL import Image
import numpy as np
import cv2

# Load pre-trained segmentation model
model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()

# Function to perform segmentation
def segment_person(image):
    try:
        if not isinstance(image, Image.Image):
            raise ValueError("Invalid image format. Please upload a valid image.")

        preprocess = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        input_tensor = preprocess(image).unsqueeze(0)

        with torch.no_grad():
            output = model(input_tensor)['out'][0]

        output_predictions = output.argmax(0)
        person_class = 15  # Person class in DeepLabV3
        mask = output_predictions == person_class
        mask_np = mask.byte().cpu().numpy()
        mask_resized = cv2.resize(mask_np, (image.width, image.height), interpolation=cv2.INTER_NEAREST)

        person = np.array(image)
        alpha_channel = (mask_resized * 255).astype(np.uint8)
        rgba_image = np.dstack((person, alpha_channel))
        return Image.fromarray(rgba_image, 'RGBA')

    except Exception as e:
        raise RuntimeError(f"Error in segmenting person: {e}")

# Function to create stereoscopic images with inserted person
def create_stereo_images(stereo_image, person_image, depth_level, x_position, y_position):
    try:
        if not isinstance(stereo_image, np.ndarray):
            raise ValueError("Invalid stereo image format. Please upload a valid image.")

        height, width, _ = stereo_image.shape
        midpoint = width // 2
        left_image = stereo_image[:, :midpoint]
        right_image = stereo_image[:, midpoint:]

        left_image = Image.fromarray(cv2.cvtColor(left_image, cv2.COLOR_BGR2RGBA))
        right_image = Image.fromarray(cv2.cvtColor(right_image, cv2.COLOR_BGR2RGBA))

        disparity_map = {"close": 10, "medium": 1, "far": -10}
        scaling_factors = {"close": 1.2, "medium": 1.0, "far": 0.8}

        if depth_level not in disparity_map:
            raise ValueError("Invalid depth level. Choose from 'close', 'medium', or 'far'.")

        disparity = disparity_map[depth_level]
        person_scale = scaling_factors[depth_level]
        person_image = person_image.resize(
            (int(person_image.width * person_scale), int(person_image.height * person_scale)),
            Image.LANCZOS
        )

        left_x, left_y = x_position, y_position
        right_x = left_x + disparity

        left_image.paste(person_image, (left_x, left_y), person_image)
        right_image.paste(person_image, (right_x, left_y), person_image)

        return left_image, right_image

    except Exception as e:
        raise RuntimeError(f"Error in creating stereo images: {e}")

# Function to create an anaglyph image
def create_anaglyph(left_image, right_image):
    try:
        left_r, _, _ = left_image.convert("RGB").split()
        _, right_g, right_b = right_image.convert("RGB").split()
        anaglyph_image = Image.merge("RGB", (left_r, right_g, right_b))
        return anaglyph_image
    except Exception as e:
        raise RuntimeError(f"Error in creating anaglyph image: {e}")

# Gradio interface function
def process_images(person_image, stereo_image, depth_level, x_position, y_position):
    try:
        person_image = segment_person(person_image)
        stereo_image = np.array(stereo_image)

        left_image, right_image = create_stereo_images(stereo_image, person_image, depth_level, x_position, y_position)
        anaglyph_image = create_anaglyph(left_image, right_image)

        return anaglyph_image
    except Exception as e:
        return f"An error occurred: {e}"


# Gradio app setup
with gr.Blocks() as demo:
    gr.Markdown("# 3D Anaglyph Image Creator")
    gr.Markdown("Upload a person image, select depth, adjust position, and create a 3D anaglyph image.")
    
    with gr.Row():
        person_image = gr.Image(label="Upload Person Image", type="pil")
        stereo_image = gr.Image(label="Upload Stereo Background Image", type="pil")
    
    depth_level = gr.Radio(["close", "medium", "far"], label="Select Depth Level")
    x_position = gr.Slider(0, 2000, step=1, label="X Position")
    y_position = gr.Slider(0, 2000, step=1, label="Y Position")
    
    output_image = gr.Image(label="Generated Anaglyph Image")
    
    generate_button = gr.Button("Generate Anaglyph Image")
    generate_button.click(process_images, [person_image, stereo_image, depth_level, x_position, y_position], output_image)
    # Add reminder text below the button
    gr.Markdown("**Reminder:** After adjusting the depth level or position, click the 'Generate Anaglyph Image' button to update the image.")

# Launch the app
demo.launch()