Spaces:

qihang
/

BerfScene

Paused

App Files Files Community

3v324v23 commited on Apr 14, 2024

Commit

8b73ab4

1 Parent(s): cf373a3

update

Browse files

Files changed (5) hide show

.gitignore +2 -0
Dockerfile +3 -0
app.py +124 -204
requirements.txt +1 -0
test.py +60 -12

.gitignore CHANGED Viewed

@@ -1,2 +1,4 @@
 *.pyc
 *.pth

 *.pyc
 *.pth
+*.whl
+*.mp4

Dockerfile CHANGED Viewed

@@ -38,6 +38,9 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
 RUN wget https://www.dropbox.com/scl/fi/105qy7mkqfjcmnfd3tmv0/edit.pth?rlkey=qcd67cdrqz4jra0p3er966iuk -O clevr.pth
 ENV TORCH_EXTENSIONS_DIR=/home/user/.cache

 RUN wget https://www.dropbox.com/scl/fi/105qy7mkqfjcmnfd3tmv0/edit.pth?rlkey=qcd67cdrqz4jra0p3er966iuk -O clevr.pth
+RUN wget https://www.dropbox.com/scl/fi/k5qc5y5rmhuru5eztegbn/gradio_draggable-0.0.1-py3-none-any.whl
+RUN pip install gradio_draggable-0.0.1-py3-none-any.whl
 ENV TORCH_EXTENSIONS_DIR=/home/user/.cache

app.py CHANGED Viewed

@@ -1,17 +1,20 @@
-print('start!', flush=True)
 import gradio as gr
 from models import build_model
 from PIL import Image
 import numpy as np
 import torchvision
 import ninja
 import torch
 from tqdm import trange
 import imageio
 import requests
 import argparse
-print('load!', flush=True)
 checkpoint = 'clevr.pth'
 state = torch.load(checkpoint, map_location='cpu')
 G = build_model(**state['model_kwargs_init']['generator_smooth'])
@@ -23,7 +26,25 @@ G_kwargs= dict(noise_mode='const',
                 fused_modulate=False,
                 impl='cuda',
                 fp16_res=None)
-print('load finish', flush=True)
 def trans(x, y, z, length):
     w = h = length
@@ -31,8 +52,29 @@ def trans(x, y, z, length):
     y = 0.5 * h - 128 + (y/9 + .5) * 256
     z = z / 9 * 256
     return x, y, z
-def get_bev_from_objs(objs, length=256, scale = 6):
-    h, w = length, length *scale
     nc = 14
     canvas = np.zeros([h, w, nc])
     xx = np.ones([h,w]).cumsum(0)
@@ -57,216 +99,94 @@ def get_bev_from_objs(objs, length=256, scale = 6):
             mask = ((xx-x)**2 + (y-yy)**2) ** 0.5 <= z
         canvas[mask] = feat
     canvas = np.transpose(canvas, [2, 0, 1]).astype(np.float32)
-    rotate_angle = 0
-    canvas = torchvision.transforms.functional.rotate(torch.tensor(canvas), rotate_angle).numpy()
     return canvas
-# COLOR_NAME_LIST = ['cyan', 'green', 'purple', 'red', 'yellow', 'gray', 'brown', 'blue']
-COLOR_NAME_LIST = ['cyan', 'green', 'purple', 'red', 'yellow', 'gray', 'purple', 'blue']
-SHAPE_NAME_LIST = ['cube', 'sphere', 'cylinder']
-MATERIAL_NAME_LIST = ['rubber', 'metal']
-xy_lib = dict()
-xy_lib['B'] = [
-    [-2, -1],
-    [-1, -1],
-    [-2, 0],
-    [-2, 1],
-    [-1, .5],
-    [0, 1],
-    [0, 0],
-    [0, -1],
-    [0, 2],
-    [-1, 2],
-    [-2, 2]
-]
-xy_lib['B'] = [
-    [-2.5, 1.25],
-    [-2, 2],
-    [-2, 0.5],
-    [-2, -0.75],
-    [-1, -1],
-    [-1, 2],
-    [-1, 0],
-    [-1, 2],
-    [0, 1],
-    [0, 0],
-    [0, -1],
-    [0, 2],
-    # [-1, 2],
-]
-xy_lib['B'] = [
-    [-2.5, 1.25],
-    [-2, 2],
-    [-2, 0.5],
-    [-2, -1],
-    [-1, -1.25],
-    [-1, 2],
-    [-1, 0],
-    [-1, 2],
-    [0, 1],
-    [0, 0],
-    [0, -1.25],
-    [0, 2],
-    # [-1, 2],
-]
-xy_lib['R'] = [
-    [0, -1],
-    [0, 0],
-    [0, 1],
-    [0, 2],
-    [-1, -1],
-    # [-1, 2],
-    [-2, -1],
-    [-2, 0],
-    [-2.25, 2],
-    [-1, 1]
-]
-xy_lib['C'] = [
-    [0, -1],
-    [0, 0],
-    [0, 1],
-    [0, 2],
-    [-1, -1],
-    [-1, 2],
-    [-2, -1],
-    # [-2, .5],
-    [-2, 2],
-    # [-1, .5]
-]
-xy_lib['s'] = [
-    [0, -1],
-    [0, 0],
-    [0, 2],
-    [-1, -1],
-    [-1, 2],
-    [-2, -1],
-    [-2, 1],
-    [-2, 2],
-    [-1, .5]
-]
-xy_lib['F'] = [
-    [0, -1],
-    [0, 0],
-    [0, 1],
-    [0, 2],
-    [-1, -1],
-    # [-1, 2],
-    [-2, -1],
-    [-2, .5],
-    # [-2, 2],
-    [-1, .5]
-]
-xy_lib['c'] = [
-    [0.8,1],
-    # [-0.8,1],
-    [0,0.1],
-    [0,1.9],
-]
-xy_lib['e'] = [
-    [0, -1],
-    [0, 0],
-    [0, 1],
-    [0, 2],
-    [-1, -1],
-    [-1, 2],
-    [-2, -1],
-    [-2, .5],
-    [-2, 2],
-    [-1, .5]
-]
-xy_lib['n'] = [
-    [0,1],
-    [0,-1],
-    [0,0.1],
-    [0,1.9],
-    [-1,0],
-    [-2,1],
-    [-3,-1],
-    [-3,1],
-    [-3,0.1],
-    [-3,1.9],
-]
-offset_x = dict(B=4, R=4, C=4, F=4, c=3, s=4, e=4, n=4.8)
-s = 'BeRFsCene'
-objs = []
-offset = 2
-for idx, c in enumerate(s):
-    xy = xy_lib[c]
-    color = np.random.choice(COLOR_NAME_LIST)
-    for i in range(len(xy)):
-        # while 1:
-        #     is_ok = 1
-        #     x, y =
-        #     for prev_x, prev_y in zip(xpool, ypool):
-        x, y = xy[i]
-        y *= 1.5
-        y -= 0.5
-        x -= offset
-        z = 0.35
-        # if idx<4:
-        #     color = np.random.choice(COLOR_NAME_LIST[:-1])
-        # else:
-        #     color = 'blue'
-        shape = 'cube'
-        material = 'rubber'
-        rot = 0
-        objs.append([x, y, z,  shape, color, material, rot])
-    offset += offset_x[c]
-Image.fromarray((255 * .8 - get_bev_from_objs(objs)[0] *.8 * 255).astype(np.uint8))
-batch_size = 1
-code = torch.randn(1, G.z_dim).cuda()
-to_pil = torchvision.transforms.ToPILImage()
-large_bevs = torch.tensor(get_bev_from_objs(objs)).cuda()[None]
-bevs = large_bevs[..., 0: 0+256]
-RT = torch.tensor([[ -1.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.5000,  -0.8660,
-          10.3923,   0.0000,  -0.8660,  -0.5000,   6.0000,   0.0000,   0.0000,
-           0.0000,   1.0000, 262.5000,   0.0000,  32.0000,   0.0000, 262.5000,
-          32.0000,   0.0000,   0.0000,   1.0000]], device='cuda')
-print('prepare finish', flush=True)
-def predict(name):
-    print('inference', name, flush=True)
     gen = G(code, RT, bevs)
     rgb = gen['gen_output']['image'][0] * .5 + .5
-    print('inference', name, flush=True)
     return to_pil(rgb)
-    # to_pil(rgb).save('tmp.png')
-    # save_path = '/mnt/petrelfs/zhangqihang/code/3d-scene-gen/tmp.png'
-    # return [save_path]
-URL = "https://source.unsplash.com/random/500x500/?nature,fruit"
-def refresh(name):
-    image = Image.open(requests.get(URL, stream=True).raw)
-    return image
 with gr.Blocks() as demo:
-    gr.HTML(
         """
-        BerfScene demo
-        """)
-    # gallery = gr.Image(show_label=False)
-    image = gr.Image(show_label=False)
-    btn = gr.Button("Result")
-    x = gr.Textbox(label="Prompt", show_label=False, lines=1, max_lines=1, info="Describe your subject (optional)", value="a person", elem_id="prompt")
-    btn.click(fn=predict, inputs=x, outputs=image)
-    # demo.load(fn=refresh, inputs=x, outputs=gallery, show_progress=False, every=1)
-        # btn.click(fn=predict, inputs=num_frames, outputs=gallery, postprocess=False)
 parser = argparse.ArgumentParser()
 parser.add_argument('--port', type=int, help='The port number', default=7860)

 import gradio as gr
 from models import build_model
 from PIL import Image
 import numpy as np
 import torchvision
+import math
 import ninja
 import torch
 from tqdm import trange
 import imageio
 import requests
 import argparse
+import imageio
+from scipy.spatial.transform import Rotation
+from gradio_draggable import Draggable
 checkpoint = 'clevr.pth'
 state = torch.load(checkpoint, map_location='cpu')
 G = build_model(**state['model_kwargs_init']['generator_smooth'])
                 fused_modulate=False,
                 impl='cuda',
                 fp16_res=None)
+print('prepare finish', flush=True)
+COLOR_NAME_LIST = ['cyan', 'green', 'purple', 'red', 'yellow', 'gray', 'purple', 'blue']
+SHAPE_NAME_LIST = ['cube', 'sphere', 'cylinder']
+MATERIAL_NAME_LIST = ['rubber', 'metal']
+canvas_x = 800
+canvas_y = 200
+batch_size = 1
+code = torch.randn(1, G.z_dim).cuda()
+to_pil = torchvision.transforms.ToPILImage()
+RT = torch.tensor([[ -1.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.5000,  -0.8660,
+          10.3923,   0.0000,  -0.8660,  -0.5000,   6.0000,   0.0000,   0.0000,
+           0.0000,   1.0000, 262.5000,   0.0000,  32.0000,   0.0000, 262.5000,
+          32.0000,   0.0000,   0.0000,   1.0000]], device='cuda')
+obj_dict = {}
 def trans(x, y, z, length):
     w = h = length
     y = 0.5 * h - 128 + (y/9 + .5) * 256
     z = z / 9 * 256
     return x, y, z
+def objs_to_canvas(lst, length=256, scale = 2.6):
+    objs = []
+    for each in lst:
+        x, y, obj_id = each['x'], each['y'], each['id']
+        if obj_id not in obj_dict:
+            color = np.random.choice(COLOR_NAME_LIST)
+            shape = 'cube'
+            material = 'rubber'
+            rot = 0
+            obj_dict[obj_id] = [color, shape, material, rot]
+        color, shape, material, rot = obj_dict[obj_id]
+        x = -x / canvas_x * 16
+        y = y / canvas_y * 2
+        y *= 2
+        x += 1.0
+        y -= 1.5
+        z = 0.35
+        objs.append([x, y, z, shape, color, material, rot])
+    h, w = length, int(length *scale)
     nc = 14
     canvas = np.zeros([h, w, nc])
     xx = np.ones([h,w]).cumsum(0)
             mask = ((xx-x)**2 + (y-yy)**2) ** 0.5 <= z
         canvas[mask] = feat
     canvas = np.transpose(canvas, [2, 0, 1]).astype(np.float32)
     return canvas
+@torch.no_grad()
+def predict_local_view(lst):
+    canvas = torch.tensor(objs_to_canvas(lst)).cuda()[None]
+    bevs = canvas[..., 0: 0+256]
     gen = G(code, RT, bevs)
     rgb = gen['gen_output']['image'][0] * .5 + .5
     return to_pil(rgb)
+@torch.no_grad()
+def predict_local_view_video(lst):
+    canvas = torch.tensor(objs_to_canvas(lst)).cuda()[None]
+    bevs = canvas[..., 0: 0+256]
+    RT_array = np.array(RT[0].cpu())
+    rot = RT_array[:16].reshape(4,4)
+    trans = RT_array[16:]
+    rot_new = rot.copy()
+    r = Rotation.from_matrix(rot[:3, :3])
+    angles = r.as_euler("zyx",degrees=True)
+    v_mean, h_mean = angles[1], angles[2]
+    writer = imageio.get_writer('tmp.mp4', fps=25)
+    for t in np.linspace(0, 1, 50):
+        angles[1] = 0.5 * np.cos(t * 2 * math.pi) + v_mean
+        angles[2] = 1 * np.sin(t * 2 * math.pi) + h_mean
+        r = Rotation.from_euler("zyx",angles,degrees=True)
+        rot_new[:3,:3] = r.as_matrix()
+        new_RT = torch.tensor(np.concatenate([rot_new.flatten(), trans])[None]).cuda().float()
+        gen = G(code, new_RT, bevs)
+        rgb = gen['gen_output']['image'][0] * .5 + .5
+        writer.append_data(np.array(to_pil(rgb)))
+    writer.close()
+    return 'tmp.mp4'
+@torch.no_grad()
+def predict_global_view(lst):
+    canvas = torch.tensor(objs_to_canvas(lst)).cuda()[None]
+    length = canvas.shape[-1]
+    lines = []
+    for i in trange(0, length - 256, 10):
+        bevs = canvas[..., i: i+256]
+        gen = G(code, RT, bevs)
+        start = 128 if i > 0 else 0
+        lines.append(gen['gen_output']['image'][0, ..., start:128+32])
+    rgb = torch.cat(lines, 2)*.5+.5
+    return to_pil(rgb)
 with gr.Blocks() as demo:
+    gr.Markdown(
+            """
+            # BerfScene: Bev-conditioned Equivariant Radiance Fields for Infinite 3D Scene Generation
+            Qihang Zhang, Yinghao Xu, Yujun Shen, Bo Dai, Bolei Zhou*, Ceyuan Yang* (*Corresponding Author)<br>
+            [Arxiv Report](https://arxiv.org/abs/2312.02136) | [Project Page](https://zqh0253.github.io/BerfScene/) | [Github](https://github.com/zqh0253/BerfScene)
+            """
+        )
+    gr.Markdown(
         """
+        ### Quick Start
+        1. Drag and place objects in the canvas.
+        2. Click `Add object` to insert object into the canvas.
+        3. Click `Reset` to clean the canvas.
+        4. Click `Get local view` to synthesize local 3D scenes.
+        5. Click `Get global view` to synthesize global 3D scenes.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            drag = Draggable()
+            with gr.Row():
+                submit_btn_local = gr.Button("Get local view", variant='primary')
+                submit_btn_global = gr.Button("Get global view", variant='primary')
+        with gr.Column():
+            with gr.Row():
+                single_view_image = gr.Image(label='single view', interactive=False)
+                single_view_video = gr.Video(label='mutli-view', interactive=False, autoplay=True)
+            global_view_image = gr.Image(label='global view', interactive=False)
+    submit_btn_local.click(fn=predict_local_view, inputs=drag, outputs=single_view_image)
+    submit_btn_local.click(fn=predict_local_view_video, inputs=drag, outputs=single_view_video)
+    submit_btn_global.click(fn=predict_global_view, inputs=drag, outputs=global_view_image)
 parser = argparse.ArgumentParser()
 parser.add_argument('--port', type=int, help='The port number', default=7860)

requirements.txt CHANGED Viewed

@@ -19,4 +19,5 @@ lmdb
 matplotlib
 einops
 imageio
 gradio

 matplotlib
 einops
 imageio
+imageio-ffmpeg
 gradio

test.py CHANGED Viewed

@@ -1,18 +1,66 @@
 import gradio as gr
-import requests
-from PIL import Image
-URL = "https://source.unsplash.com/random/500x500/?nature,fruit"
-def refresh():
-    image = Image.open(requests.get(URL, stream=True).raw)
-    return image
-with gr.Blocks() as blocks:
-    image = gr.Image(show_label=False)
-    blocks.load(fn=refresh, inputs=None, outputs=image,
-                show_progress=False, every=1)
-blocks.queue(api_open=False)
-blocks.launch(server_name='0.0.0.0', server_port=10093)

 import gradio as gr
+def update_position(data):
+    # data will be the position of the rectangle, expected to be a JSON string
+    return data  # Here you can parse and use the position data as needed
+html_code = """
+<div id="canvas-container"></div>
+<script>
+document.getElementById('canvas-container').innerHTML = `
+  <canvas id="canvas" width="500" height="500"></canvas>
+`;
+const canvas = document.getElementById('canvas');
+const ctx = canvas.getContext('2d');
+const rect = { x: 50, y: 50, width: 100, height: 50, isDragging: false };
+function draw() {
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+    ctx.fillStyle = 'blue';
+    ctx.fillRect(rect.x, rect.y, rect.width, rect.height);
+}
+function sendData() {
+    GradioApp.send({x: rect.x, y: rect.y});
+}
+function mouseDown(e) {
+    if (e.offsetX >= rect.x && e.offsetX <= rect.x + rect.width &&
+        e.offsetY >= rect.y && e.offsetY <= rect.y + rect.height) {
+      rect.isDragging = true;
+    }
+}
+function mouseMove(e) {
+    if (rect.isDragging) {
+      rect.x = e.offsetX - rect.width / 2;
+      rect.y = e.offsetY - rect.height / 2;
+      draw();
+      sendData();
+    }
+}
+function mouseUp() {
+    rect.isDragging = false;
+    sendData();
+}
+canvas.addEventListener('mousedown', mouseDown);
+canvas.addEventListener('mousemove', mouseMove);
+canvas.addEventListener('mouseup', mouseUp);
+draw();
+</script>
+"""
+interface = gr.Interface(
+    fn=update_position,
+    inputs=gr.HTML(),
+    outputs="json",
+    allow_flagging="never",
+    live=True
+)
+interface.launch(server_name='0.0.0.0', server_port=7860)