VTON360 / src /render_from_thuman /render_multiview_images.py
weiyuyeh's picture
init
a03472d
import taichi_three as t3
import numpy as np
from taichi_three.transform import *
import math
from pathlib import Path
from tqdm import tqdm
import os
import cv2
import pickle
import json
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
def extr2tf_mat(extr):
"""
Return C2W matrix in the shape of (4, 4).
"""
# flip y and z axis in the camera coordinate space, according to
# [ns-data conventions](https://docs.nerf.studio/quickstart/data_conventions.html#camera-view-space)
trans_mat = np.eye(4)
trans_mat[1, 1] = -1
trans_mat[2, 2] = -1
tf_mat = np.stack([extr[0], extr[1], extr[2], [0, 0, 0, 1]], axis=0)
tf_mat = trans_mat @ tf_mat
tf_mat = np.linalg.inv(tf_mat)
return tf_mat
def intr2dict(intr: np.ndarray):
intr_dict = {
"fl_x": intr[0, 0],
"fl_y": intr[1, 1],
"cx": intr[0, 2],
"cy": intr[1, 2],
}
return intr_dict
def save(data_id, save_path, extrs, intrs, depths, imgs, masks):
instance_path = os.path.join(save_path, data_id)
img_save_path = os.path.join(save_path, data_id, 'images')
depth_save_path = os.path.join(save_path, data_id, 'depth')
mask_save_path = os.path.join(save_path, data_id, 'mask')
Path(img_save_path).mkdir(exist_ok=True, parents=True)
Path(mask_save_path).mkdir(exist_ok=True, parents=True)
Path(depth_save_path).mkdir(exist_ok=True, parents=True)
h, w = imgs[0].shape[:2]
transforms_dict = {
"w": w,
"h": h,
"k1": 0, "k2": 0, "p1": 0, "p2": 0,
"camera_model": "OPENCV",
}
frames = []
num_views = len(extrs)
for pid in range(num_views):
# nyw note: depth is normalized to [0, 1] in taichi_three,so we need to scale it back to [0, 2^15]
depth = depths[pid] * 2.0 ** 15
cv2.imwrite(os.path.join(depth_save_path, f'{pid:03d}.png'), depth.astype(np.uint16))
img = (np.clip(imgs[pid], 0, 1) * 255.0 + 0.5).astype(np.uint8)[:, :, ::-1]
cv2.imwrite(os.path.join(img_save_path, f'{pid:03d}.jpg'), img)
mask = (np.clip(masks[pid], 0, 1) * 255.0 + 0.5).astype(np.uint8)
cv2.imwrite(os.path.join(mask_save_path, f'{pid:03d}.png'), mask[:, :, 0])
frame_dict = {
"file_path": f"images/{pid:03d}.jpg",
"mask_path": f"mask/{pid:03d}.png",
"depth_file_path": f"depth/{pid:03d}.png",
"transform_matrix": extr2tf_mat(extrs[pid]).tolist(),
}
intr_dict = intr2dict(intrs[pid])
frame_dict.update(intr_dict)
frames.append(frame_dict)
transforms_dict["frames"] = frames
with open(os.path.join(instance_path, "transforms.json"), "w") as f:
json.dump(transforms_dict, f, indent=4)
# with open(os.path.join(instance_path, "_transforms.json"), "w") as f:
# json.dump(transforms_dict, f, indent=4)
def save_normal(data_id, save_path, depths, normals):
normal_save_path = os.path.join(save_path, data_id, 'normals')
depth_save_path = os.path.join(save_path, data_id, 'depth_smpls')
Path(normal_save_path).mkdir(exist_ok=True, parents=True)
Path(depth_save_path).mkdir(exist_ok=True, parents=True)
num_views = len(depths)
for pid in range(num_views):
# nyw note: depth is normalized to [0, 1] in taichi_three,so we need to scale it back to [0, 2^15]
depth = depths[pid] * 2.0 ** 15
cv2.imwrite(os.path.join(depth_save_path, f'{pid:03d}.png'), depth.astype(np.uint16))
normal = (np.clip(normals[pid], 0, 1) * 255.0 + 0.5).astype(np.uint8)[:, :, ::-1]
cv2.imwrite(os.path.join(normal_save_path, f'{pid:03d}.jpg'), normal)
class StaticRenderer:
def __init__(self):
ti.init(arch=ti.cuda, device_memory_fraction=0.8)
self.scene = t3.Scene()
self.N = 10
def change_all(self):
save_obj = []
save_tex = []
for model in self.scene.models:
save_obj.append(model.init_obj)
save_tex.append(model.init_tex)
ti.init(arch=ti.cuda, device_memory_fraction=0.8)
print('init')
self.scene = t3.Scene()
for i in range(len(save_obj)):
model = t3.StaticModel(self.N, obj=save_obj[i], tex=save_tex[i])
self.scene.add_model(model)
def check_update(self, obj):
temp_n = self.N
self.N = max(obj['vi'].shape[0], self.N)
self.N = max(obj['f'].shape[0], self.N)
if not (obj['vt'] is None):
self.N = max(obj['vt'].shape[0], self.N)
if self.N > temp_n:
self.N *= 2
self.change_all()
self.camera_light()
def add_model(self, obj, tex=None):
self.check_update(obj)
model = t3.StaticModel(self.N, obj=obj, tex=tex)
self.scene.add_model(model)
def modify_model(self, index, obj, tex=None):
self.check_update(obj)
self.scene.models[index].init_obj = obj
self.scene.models[index].init_tex = tex
self.scene.models[index]._init()
def camera_light(self):
camera = t3.Camera(res=(1024, 1024))
self.scene.add_camera(camera)
camera_hr = t3.Camera(res=(2048, 2048))
self.scene.add_camera(camera_hr)
light_dir = np.array([0, 0, 1])
light_list = []
for l in range(6):
rotate = np.matmul(rotationX(math.radians(np.random.uniform(-30, 30))),
rotationY(math.radians(360 // 6 * l)))
dir = [*np.matmul(rotate, light_dir)]
light = t3.Light(dir, color=[1.0, 1.0, 1.0])
light_list.append(light)
lights = t3.Lights(light_list)
self.scene.add_lights(lights)
def render_data(renderer, data_path, phase, data_id, save_path, cam_nums, res, dis=1.0, is_thuman=False, is_smpl_model=False, seed_value=0):
np.random.seed(seed_value)
if not is_smpl_model:
obj_path = os.path.join(data_path, phase, data_id, '%s.obj' % data_id)
texture_path = data_path
img_path = os.path.join(texture_path, phase, data_id, 'material0.jpeg')
texture = cv2.imread(img_path)[:, :, ::-1]
# ################ nyw add equalizeHist for texture ################
# # comment out the following lines to disable equalizeHist for texture
# texture = cv2.cvtColor(texture, cv2.COLOR_RGB2HSV)
# texture[:, :, 2] = cv2.equalizeHist(texture[:, :, 2]) * 0.85 # scale down the brightness by 0.85
# texture = cv2.cvtColor(texture, cv2.COLOR_HSV2RGB)
# ################ nyw add equalizeHist for texture ################
texture = np.ascontiguousarray(texture)
texture = texture.swapaxes(0, 1)[:, ::-1, :]
else:
# obj_path = '/data1/hezijian/Thuman2.1_GPS/0000.obj'
obj_path = '/data1/hezijian/Thuman2.1/THuman2.0_Smpl_X_Paras/%s/mesh_smplx.obj' % data_id
obj = t3.readobj(obj_path, scale=1)
# height normalization
vy_max = np.max(obj['vi'][:, 1])
vy_min = np.min(obj['vi'][:, 1])
human_height = 1.80 + np.random.uniform(-0.05, 0.05, 1)
obj['vi'][:, :3] = obj['vi'][:, :3] / (vy_max - vy_min) * human_height
obj['vi'][:, 1] -= np.min(obj['vi'][:, 1])
look_at_center = np.array([0, 0.85, 0])
base_cam_pitch = -8
################ nyw: add multi-pitchs for better reconstruction ################
# base_cam_pitch = -8
cam_pitchs = [-8, 45, -45, 90, -90]
cam_nums_for_each_pitch = [cam_nums, cam_nums//2, cam_nums//2, 1, 1]
################ nyw: add multi-pitchs for better reconstruction ################
# randomly move the scan
move_range = 0.1 if human_height < 1.80 else 0.05
delta_x = np.max(obj['vi'][:, 0]) - np.min(obj['vi'][:, 0])
delta_z = np.max(obj['vi'][:, 2]) - np.min(obj['vi'][:, 2])
if delta_x > 1.0 or delta_z > 1.0:
move_range = 0.01
obj['vi'][:, 0] += np.random.uniform(-move_range, move_range, 1)
obj['vi'][:, 2] += np.random.uniform(-move_range, move_range, 1)
if len(renderer.scene.models) >= 1:
if not is_smpl_model:
renderer.modify_model(0, obj, texture)
else:
renderer.modify_model(0, obj)
else:
if not is_smpl_model:
renderer.add_model(obj, texture)
else:
renderer.add_model(obj)
if is_thuman:
# thuman needs a normalization of orientation
smpl_path = os.path.join(data_path, 'THuman2.0_Smpl_X_Paras', data_id, 'smplx_param.pkl')
with open(smpl_path, 'rb') as f:
smpl_para = pickle.load(f)
y_orient = smpl_para['global_orient'][0][1]
angle_base = (y_orient*180.0/np.pi)
# nyw note: generate one instance of thuman in this loop
extrs, intrs, depths, imgs, masks, normals = [], [], [], [], [], []
for ci, cam_pitch in enumerate(cam_pitchs):
for pid in range(cam_nums := cam_nums_for_each_pitch[ci]):
degree_interval = 360 / cam_nums
angle = angle_base + pid * degree_interval
def render(dis, angle, look_at_center, p, renderer, render_2k=False, render_normal=False):
ori_vec = np.array([0, 0, dis])
rotate = np.matmul(rotationY(math.radians(angle)), rotationX(math.radians(p)))
fwd = np.matmul(rotate, ori_vec)
cam_pos = look_at_center + fwd
x_min = 0
y_min = -25
cx = res[0] * 0.5
cy = res[1] * 0.5
fx = res[0] * 0.8
fy = res[1] * 0.8
_cx = cx - x_min
_cy = cy - y_min
renderer.scene.cameras[0].set_intrinsic(fx, fy, _cx, _cy)
renderer.scene.cameras[0].set(pos=cam_pos, target=look_at_center)
renderer.scene.cameras[0]._init()
if render_2k:
fx = res[0] * 0.8 * 2
fy = res[1] * 0.8 * 2
_cx = (res[0] * 0.5 - x_min) * 2
_cy = (res[1] * 0.5 - y_min) * 2
renderer.scene.cameras[1].set_intrinsic(fx, fy, _cx, _cy)
renderer.scene.cameras[1].set(pos=cam_pos, target=look_at_center)
renderer.scene.cameras[1]._init()
renderer.scene.render()
camera = renderer.scene.cameras[0]
camera_hr = renderer.scene.cameras[1]
extrinsic = camera.export_extrinsic()
intrinsic = camera.export_intrinsic()
depth_map = camera.zbuf.to_numpy().swapaxes(0, 1)
img = camera.img.to_numpy().swapaxes(0, 1)
img_hr = camera_hr.img.to_numpy().swapaxes(0, 1)
mask = camera.mask.to_numpy().swapaxes(0, 1)
return extrinsic, intrinsic, depth_map, img, mask, img_hr
renderer.scene.render()
camera = renderer.scene.cameras[0]
extrinsic = camera.export_extrinsic()
intrinsic = camera.export_intrinsic()
depth_map = camera.zbuf.to_numpy().swapaxes(0, 1)
if not render_normal:
img = camera.img.to_numpy().swapaxes(0, 1)
else:
img = camera.normal_map.to_numpy().swapaxes(0, 1)
mask = camera.mask.to_numpy().swapaxes(0, 1)
return extrinsic, intrinsic, depth_map, img, mask
if not is_smpl_model:
extr, intr, depth, img, mask = render(dis, angle, look_at_center, cam_pitch, renderer)
extrs.append(extr)
intrs.append(intr)
depths.append(depth)
imgs.append(img)
masks.append(mask)
else:
extr, intr, depth, img, mask = render(dis, angle, look_at_center, cam_pitch, renderer, render_normal=True)
depths.append(depth)
normals.append(img)
if not is_smpl_model:
save(data_id, save_path, extrs, intrs, depths, imgs, masks)
else:
save_normal(data_id, save_path, depths, normals)
if __name__ == '__main__':
cam_nums = 80
scene_radius = 2.0
res = (1024, 1024)
thuman_root = '/PATH/TO/Thuman2.1/'
save_root = '/PATH/TO/OUTPUT'
renderer = StaticRenderer()
# for phase in ['train', 'val']:
phase = 'all'
thuman_list = sorted(os.listdir(os.path.join(thuman_root, phase)))
thuman_list = thuman_list[512:513]
save_path = os.path.join(save_root, phase)
seed_value = np.random.randint(1,1000)
for data_id in tqdm(thuman_list):
render_data(renderer, thuman_root, phase, data_id, save_path, cam_nums, res, dis=scene_radius, is_thuman=True, seed_value=seed_value)
render_data(renderer, thuman_root, phase, data_id, save_path, cam_nums, res, dis=scene_radius, is_thuman=True, is_smpl_model=True, seed_value=seed_value)