|
from matplotlib import pyplot as plt |
|
import numpy as np |
|
import torch |
|
|
|
import numpy as np |
|
from typing import List |
|
import sys |
|
sys.path.append('./submodules/gaussian-splatting/') |
|
from scene.cameras import Camera |
|
from PIL import Image |
|
import imageio |
|
from scipy.interpolate import splprep, splev |
|
|
|
import cv2 |
|
import numpy as np |
|
import plotly.graph_objects as go |
|
import numpy as np |
|
from scipy.spatial.transform import Rotation as R, Slerp |
|
from scipy.spatial import distance_matrix |
|
from sklearn.decomposition import PCA |
|
from scipy.interpolate import splprep, splev |
|
from typing import List |
|
from sklearn.mixture import GaussianMixture |
|
|
|
def render_gaussians_rgb(generator3DGS, viewpoint_cam, visualize=False): |
|
""" |
|
Simply render gaussians from the generator3DGS from the viewpoint_cam. |
|
Args: |
|
generator3DGS : instance of the Generator3DGS class from the networks.py file |
|
viewpoint_cam : camera instance |
|
visualize : boolean flag. If True, will call pyplot function and render image inplace |
|
Returns: |
|
uint8 numpy array with shape (H, W, 3) representing the image |
|
""" |
|
with torch.no_grad(): |
|
render_pkg = generator3DGS(viewpoint_cam) |
|
image = render_pkg["render"] |
|
image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) |
|
|
|
|
|
image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) |
|
if visualize: |
|
plt.figure(figsize=(12, 8)) |
|
plt.imshow(image_np) |
|
plt.show() |
|
|
|
return image_np |
|
|
|
def render_gaussians_D_scores(generator3DGS, viewpoint_cam, mask=None, mask_channel=0, visualize=False): |
|
""" |
|
Simply render D_scores of gaussians from the generator3DGS from the viewpoint_cam. |
|
Args: |
|
generator3DGS : instance of the Generator3DGS class from the networks.py file |
|
viewpoint_cam : camera instance |
|
visualize : boolean flag. If True, will call pyplot function and render image inplace |
|
mask : optional mask to highlight specific gaussians. Must be of shape (N) where N is the numnber |
|
of gaussians in generator3DGS.gaussians. Must be a torch tensor of floats, please scale according |
|
to how much color you want to have. Recommended mask value is 10. |
|
mask_channel: to which color channel should we add mask |
|
Returns: |
|
uint8 numpy array with shape (H, W, 3) representing the generator3DGS.gaussians.D_scores rendered as colors |
|
""" |
|
with torch.no_grad(): |
|
|
|
generator3DGS.gaussians._features_dc = generator3DGS.gaussians._features_dc * 1e-4 + \ |
|
torch.stack([generator3DGS.gaussians.D_scores] * 3, axis=-1) |
|
generator3DGS.gaussians._features_rest = generator3DGS.gaussians._features_rest * 1e-4 |
|
if mask is not None: |
|
generator3DGS.gaussians._features_dc[..., mask_channel] += mask.unsqueeze(-1) |
|
render_pkg = generator3DGS(viewpoint_cam) |
|
image = render_pkg["render"] |
|
image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0) |
|
|
|
|
|
image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8) |
|
if visualize: |
|
plt.figure(figsize=(12, 8)) |
|
plt.imshow(image_np) |
|
plt.show() |
|
|
|
if mask is not None: |
|
generator3DGS.gaussians._features_dc[..., mask_channel] -= mask.unsqueeze(-1) |
|
|
|
generator3DGS.gaussians._features_dc = (generator3DGS.gaussians._features_dc - \ |
|
torch.stack([generator3DGS.gaussians.D_scores] * 3, axis=-1)) * 1e4 |
|
generator3DGS.gaussians._features_rest = generator3DGS.gaussians._features_rest * 1e4 |
|
|
|
return image_np |
|
|
|
|
|
|
|
def normalize(v): |
|
""" |
|
Normalize a vector to unit length. |
|
|
|
Parameters: |
|
v (np.ndarray): Input vector. |
|
|
|
Returns: |
|
np.ndarray: Unit vector in the same direction as `v`. |
|
""" |
|
return v / np.linalg.norm(v) |
|
|
|
def look_at_rotation(camera_position: np.ndarray, target: np.ndarray, world_up=np.array([0, 1, 0])): |
|
""" |
|
Compute a rotation matrix for a camera looking at a target point. |
|
|
|
Parameters: |
|
camera_position (np.ndarray): The 3D position of the camera. |
|
target (np.ndarray): The point the camera should look at. |
|
world_up (np.ndarray): A vector that defines the global 'up' direction. |
|
|
|
Returns: |
|
np.ndarray: A 3x3 rotation matrix (camera-to-world) with columns [right, up, forward]. |
|
""" |
|
z_axis = normalize(target - camera_position) |
|
x_axis = normalize(np.cross(world_up, z_axis)) |
|
y_axis = np.cross(z_axis, x_axis) |
|
return np.stack([x_axis, y_axis, z_axis], axis=1) |
|
|
|
|
|
def generate_circular_camera_path(existing_cameras: List[Camera], N: int = 12, radius_scale: float = 1.0, d: float = 2.0) -> List[Camera]: |
|
""" |
|
Generate a circular path of cameras around an existing camera group, |
|
with each new camera oriented to look at the average viewing direction. |
|
|
|
Parameters: |
|
existing_cameras (List[Camera]): List of existing camera objects to estimate average orientation and layout. |
|
N (int): Number of new cameras to generate along the circular path. |
|
radius_scale (float): Scale factor to adjust the radius of the circle. |
|
d (float): Distance ahead of each camera used to estimate its look-at point. |
|
|
|
Returns: |
|
List[Camera]: A list of newly generated Camera objects forming a circular path and oriented toward a shared view center. |
|
""" |
|
|
|
center = np.mean([cam.T for cam in existing_cameras], axis=0) |
|
|
|
|
|
|
|
look_targets = [cam.T + cam.R[:, 2] * d for cam in existing_cameras] |
|
center_of_view = np.mean(look_targets, axis=0) |
|
|
|
|
|
avg_forward = normalize(np.mean([cam.R[:, 2] for cam in existing_cameras], axis=0)) |
|
up_guess = np.array([0, 1, 0]) |
|
right = normalize(np.cross(avg_forward, up_guess)) |
|
up = normalize(np.cross(right, avg_forward)) |
|
|
|
|
|
avg_radius = np.mean([np.linalg.norm(cam.T - center) for cam in existing_cameras]) * radius_scale |
|
|
|
|
|
angles = np.linspace(0, 2 * np.pi, N, endpoint=False) |
|
reference_cam = existing_cameras[0] |
|
new_cameras = [] |
|
|
|
|
|
for i, a in enumerate(angles): |
|
position = center + avg_radius * (np.cos(a) * right + np.sin(a) * up) |
|
|
|
if d < 1e-5 or radius_scale < 1e-5: |
|
|
|
R = reference_cam.R.copy() |
|
else: |
|
|
|
R = look_at_rotation(position, center_of_view) |
|
new_cameras.append(Camera( |
|
R=R, |
|
T=position, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"circular_a={a:.3f}", |
|
uid=i |
|
)) |
|
|
|
return new_cameras |
|
|
|
|
|
def save_numpy_frames_as_gif(frames, output_path="animation.gif", duration=100): |
|
""" |
|
Save a list of RGB NumPy frames as a looping GIF animation. |
|
|
|
Parameters: |
|
frames (List[np.ndarray]): List of RGB images as uint8 NumPy arrays (shape HxWx3). |
|
output_path (str): Path to save the output GIF. |
|
duration (int): Duration per frame in milliseconds. |
|
|
|
Returns: |
|
None |
|
""" |
|
pil_frames = [Image.fromarray(f) for f in frames] |
|
pil_frames[0].save( |
|
output_path, |
|
save_all=True, |
|
append_images=pil_frames[1:], |
|
duration=duration, |
|
loop=0 |
|
) |
|
print(f"GIF saved to: {output_path}") |
|
|
|
def center_crop_frame(frame: np.ndarray, crop_fraction: float) -> np.ndarray: |
|
""" |
|
Crop the central region of the frame by the given fraction. |
|
|
|
Parameters: |
|
frame (np.ndarray): Input RGB image (H, W, 3). |
|
crop_fraction (float): Fraction of the original size to retain (e.g., 0.8 keeps 80%). |
|
|
|
Returns: |
|
np.ndarray: Cropped RGB image. |
|
""" |
|
if crop_fraction >= 1.0: |
|
return frame |
|
|
|
h, w, _ = frame.shape |
|
new_h, new_w = int(h * crop_fraction), int(w * crop_fraction) |
|
start_y = (h - new_h) // 2 |
|
start_x = (w - new_w) // 2 |
|
return frame[start_y:start_y + new_h, start_x:start_x + new_w, :] |
|
|
|
|
|
|
|
def generate_smooth_closed_camera_path(existing_cameras: List[Camera], N: int = 120, d: float = 2.0, s=.25) -> List[Camera]: |
|
""" |
|
Generate a smooth, closed path interpolating the positions of existing cameras. |
|
|
|
Parameters: |
|
existing_cameras (List[Camera]): List of existing cameras. |
|
N (int): Number of points (cameras) to sample along the smooth path. |
|
d (float): Distance ahead for estimating the center of view. |
|
|
|
Returns: |
|
List[Camera]: A list of smoothly moving Camera objects along a closed loop. |
|
""" |
|
|
|
positions = np.array([cam.T for cam in existing_cameras]) |
|
|
|
|
|
look_targets = [cam.T + cam.R[:, 2] * d for cam in existing_cameras] |
|
center_of_view = np.mean(look_targets, axis=0) |
|
|
|
|
|
positions = np.vstack([positions, positions[0]]) |
|
tck, u = splprep(positions.T, s=s, per=True) |
|
|
|
|
|
u_fine = np.linspace(0, 1, N) |
|
smooth_path = np.stack(splev(u_fine, tck), axis=-1) |
|
|
|
|
|
reference_cam = existing_cameras[0] |
|
new_cameras = [] |
|
|
|
for i, pos in enumerate(smooth_path): |
|
R = look_at_rotation(pos, center_of_view) |
|
new_cameras.append(Camera( |
|
R=R, |
|
T=pos, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"smooth_path_i={i}", |
|
uid=i |
|
)) |
|
|
|
return new_cameras |
|
|
|
|
|
def save_numpy_frames_as_mp4(frames, output_path="animation.mp4", fps=10, center_crop: float = 1.0): |
|
""" |
|
Save a list of RGB NumPy frames as an MP4 video with optional center cropping. |
|
|
|
Parameters: |
|
frames (List[np.ndarray]): List of RGB images as uint8 NumPy arrays (shape HxWx3). |
|
output_path (str): Path to save the output MP4. |
|
fps (int): Frames per second for playback speed. |
|
center_crop (float): Fraction (0 < center_crop <= 1.0) of central region to retain. |
|
Use 1.0 for no cropping; 0.8 to crop to 80% center region. |
|
|
|
Returns: |
|
None |
|
""" |
|
with imageio.get_writer(output_path, fps=fps, codec='libx264', quality=8) as writer: |
|
for frame in frames: |
|
cropped = center_crop_frame(frame, center_crop) |
|
writer.append_data(cropped) |
|
print(f"MP4 saved to: {output_path}") |
|
|
|
|
|
|
|
def put_text_on_image(img: np.ndarray, text: str) -> np.ndarray: |
|
""" |
|
Draws multiline white text on a copy of the input image, positioned near the bottom |
|
and around 80% of the image width. Handles '\n' characters to split text into multiple lines. |
|
|
|
Args: |
|
img (np.ndarray): Input image as a (H, W, 3) uint8 numpy array. |
|
text (str): Text string to draw on the image. Newlines '\n' are treated as line breaks. |
|
|
|
Returns: |
|
np.ndarray: The output image with the text drawn on it. |
|
|
|
Notes: |
|
- The function automatically adjusts line spacing and prevents text from going outside the image. |
|
- Text is drawn in white with small font size (0.5) for minimal visual impact. |
|
""" |
|
img = img.copy() |
|
height, width, _ = img.shape |
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
font_scale = 1. |
|
color = (255, 255, 255) |
|
thickness = 2 |
|
line_spacing = 5 |
|
|
|
lines = text.split('\n') |
|
|
|
|
|
max_text_width = max(cv2.getTextSize(line, font, font_scale, thickness)[0][0] for line in lines) |
|
|
|
x = int(0.8 * width) |
|
x = min(x, width - max_text_width - 30) |
|
|
|
|
|
|
|
total_text_height = len(lines) * (cv2.getTextSize('A', font, font_scale, thickness)[0][1] + line_spacing) |
|
y_start = int(height*0.9) - total_text_height |
|
|
|
for i, line in enumerate(lines): |
|
y = y_start + i * (cv2.getTextSize(line, font, font_scale, thickness)[0][1] + line_spacing) |
|
cv2.putText(img, line, (x, y), font, font_scale, color, thickness, cv2.LINE_AA) |
|
|
|
return img |
|
|
|
|
|
|
|
|
|
def catmull_rom_spline(P0, P1, P2, P3, n_points=20): |
|
""" |
|
Compute Catmull-Rom spline segment between P1 and P2. |
|
""" |
|
t = np.linspace(0, 1, n_points)[:, None] |
|
|
|
M = 0.5 * np.array([ |
|
[-1, 3, -3, 1], |
|
[ 2, -5, 4, -1], |
|
[-1, 0, 1, 0], |
|
[ 0, 2, 0, 0] |
|
]) |
|
|
|
G = np.stack([P0, P1, P2, P3], axis=0) |
|
T = np.concatenate([t**3, t**2, t, np.ones_like(t)], axis=1) |
|
|
|
return T @ M @ G |
|
|
|
def sort_cameras_pca(existing_cameras: List[Camera]): |
|
""" |
|
Sort cameras along the main PCA axis. |
|
""" |
|
positions = np.array([cam.T for cam in existing_cameras]) |
|
pca = PCA(n_components=1) |
|
scores = pca.fit_transform(positions) |
|
sorted_indices = np.argsort(scores[:, 0]) |
|
return sorted_indices |
|
|
|
def generate_fully_smooth_cameras(existing_cameras: List[Camera], |
|
n_selected: int = 30, |
|
n_points_per_segment: int = 20, |
|
d: float = 2.0, |
|
closed: bool = False) -> List[Camera]: |
|
""" |
|
Generate a fully smooth camera path using PCA ordering, global Catmull-Rom spline for positions, and global SLERP for orientations. |
|
|
|
Args: |
|
existing_cameras (List[Camera]): List of input cameras. |
|
n_selected (int): Number of cameras to select after sorting. |
|
n_points_per_segment (int): Number of interpolated points per spline segment. |
|
d (float): Distance ahead for estimating center of view. |
|
closed (bool): Whether to close the path. |
|
|
|
Returns: |
|
List[Camera]: List of smoothly moving Camera objects. |
|
""" |
|
|
|
sorted_indices = sort_cameras_pca(existing_cameras) |
|
sorted_cameras = [existing_cameras[i] for i in sorted_indices] |
|
positions = np.array([cam.T for cam in sorted_cameras]) |
|
|
|
|
|
idx = np.linspace(0, len(positions) - 1, n_selected).astype(int) |
|
sampled_positions = positions[idx] |
|
sampled_cameras = [sorted_cameras[i] for i in idx] |
|
|
|
|
|
if closed: |
|
sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]]) |
|
else: |
|
sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]]) |
|
|
|
|
|
path_positions = [] |
|
for i in range(1, len(sampled_positions) - 2): |
|
segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment) |
|
path_positions.append(segment) |
|
path_positions = np.concatenate(path_positions, axis=0) |
|
|
|
|
|
rotations = R.from_matrix([cam.R for cam in sampled_cameras]) |
|
key_times = np.linspace(0, 1, len(rotations)) |
|
slerp = Slerp(key_times, rotations) |
|
|
|
query_times = np.linspace(0, 1, len(path_positions)) |
|
interpolated_rotations = slerp(query_times) |
|
|
|
|
|
reference_cam = existing_cameras[0] |
|
smooth_cameras = [] |
|
|
|
for i, pos in enumerate(path_positions): |
|
R_interp = interpolated_rotations[i].as_matrix() |
|
|
|
smooth_cameras.append(Camera( |
|
R=R_interp, |
|
T=pos, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"fully_smooth_path_i={i}", |
|
uid=i |
|
)) |
|
|
|
return smooth_cameras |
|
|
|
|
|
def plot_cameras_and_smooth_path_with_orientation(existing_cameras: List[Camera], smooth_cameras: List[Camera], scale: float = 0.1): |
|
""" |
|
Plot input cameras and smooth path cameras with their orientations in 3D. |
|
|
|
Args: |
|
existing_cameras (List[Camera]): List of original input cameras. |
|
smooth_cameras (List[Camera]): List of smooth path cameras. |
|
scale (float): Length of orientation arrows. |
|
|
|
Returns: |
|
None |
|
""" |
|
|
|
input_positions = np.array([cam.T for cam in existing_cameras]) |
|
|
|
|
|
smooth_positions = np.array([cam.T for cam in smooth_cameras]) |
|
|
|
fig = go.Figure() |
|
|
|
|
|
fig.add_trace(go.Scatter3d( |
|
x=input_positions[:, 0], y=input_positions[:, 1], z=input_positions[:, 2], |
|
mode='markers', |
|
marker=dict(size=4, color='blue'), |
|
name='Input Cameras' |
|
)) |
|
|
|
|
|
fig.add_trace(go.Scatter3d( |
|
x=smooth_positions[:, 0], y=smooth_positions[:, 1], z=smooth_positions[:, 2], |
|
mode='lines+markers', |
|
line=dict(color='red', width=3), |
|
marker=dict(size=2, color='red'), |
|
name='Smooth Path Cameras' |
|
)) |
|
|
|
|
|
for cam in existing_cameras: |
|
origin = cam.T |
|
forward = cam.R[:, 2] |
|
|
|
fig.add_trace(go.Cone( |
|
x=[origin[0]], y=[origin[1]], z=[origin[2]], |
|
u=[forward[0]], v=[forward[1]], w=[forward[2]], |
|
colorscale=[[0, 'blue'], [1, 'blue']], |
|
sizemode="absolute", |
|
sizeref=scale, |
|
anchor="tail", |
|
showscale=False, |
|
name='Input Camera Direction' |
|
)) |
|
|
|
|
|
for cam in smooth_cameras: |
|
origin = cam.T |
|
forward = cam.R[:, 2] |
|
|
|
fig.add_trace(go.Cone( |
|
x=[origin[0]], y=[origin[1]], z=[origin[2]], |
|
u=[forward[0]], v=[forward[1]], w=[forward[2]], |
|
colorscale=[[0, 'red'], [1, 'red']], |
|
sizemode="absolute", |
|
sizeref=scale, |
|
anchor="tail", |
|
showscale=False, |
|
name='Smooth Camera Direction' |
|
)) |
|
|
|
fig.update_layout( |
|
scene=dict( |
|
xaxis_title='X', |
|
yaxis_title='Y', |
|
zaxis_title='Z', |
|
aspectmode='data' |
|
), |
|
title="Input Cameras and Smooth Path with Orientations", |
|
margin=dict(l=0, r=0, b=0, t=30) |
|
) |
|
|
|
fig.show() |
|
|
|
|
|
def solve_tsp_nearest_neighbor(points: np.ndarray): |
|
""" |
|
Solve TSP approximately using nearest neighbor heuristic. |
|
|
|
Args: |
|
points (np.ndarray): (N, 3) array of points. |
|
|
|
Returns: |
|
List[int]: Optimal visiting order of points. |
|
""" |
|
N = points.shape[0] |
|
dist = distance_matrix(points, points) |
|
visited = [0] |
|
unvisited = set(range(1, N)) |
|
|
|
while unvisited: |
|
last = visited[-1] |
|
next_city = min(unvisited, key=lambda city: dist[last, city]) |
|
visited.append(next_city) |
|
unvisited.remove(next_city) |
|
|
|
return visited |
|
|
|
def solve_tsp_2opt(points: np.ndarray, n_iter: int = 1000) -> np.ndarray: |
|
""" |
|
Solve TSP approximately using Nearest Neighbor + 2-Opt. |
|
|
|
Args: |
|
points (np.ndarray): Array of shape (N, D) with points. |
|
n_iter (int): Number of 2-opt iterations. |
|
|
|
Returns: |
|
np.ndarray: Ordered list of indices. |
|
""" |
|
n_points = points.shape[0] |
|
|
|
|
|
unvisited = list(range(n_points)) |
|
current = unvisited.pop(0) |
|
path = [current] |
|
|
|
while unvisited: |
|
dists = np.linalg.norm(points[unvisited] - points[current], axis=1) |
|
next_idx = unvisited[np.argmin(dists)] |
|
unvisited.remove(next_idx) |
|
path.append(next_idx) |
|
current = next_idx |
|
|
|
|
|
def path_length(path): |
|
return np.sum(np.linalg.norm(points[path[i]] - points[path[i+1]], axis=0) for i in range(len(path)-1)) |
|
|
|
best_length = path_length(path) |
|
improved = True |
|
|
|
for _ in range(n_iter): |
|
if not improved: |
|
break |
|
improved = False |
|
for i in range(1, n_points - 2): |
|
for j in range(i + 1, n_points): |
|
if j - i == 1: continue |
|
new_path = path[:i] + path[i:j][::-1] + path[j:] |
|
new_length = path_length(new_path) |
|
if new_length < best_length: |
|
path = new_path |
|
best_length = new_length |
|
improved = True |
|
break |
|
if improved: |
|
break |
|
|
|
return np.array(path) |
|
|
|
def generate_fully_smooth_cameras_with_tsp(existing_cameras: List[Camera], |
|
n_selected: int = 30, |
|
n_points_per_segment: int = 20, |
|
d: float = 2.0, |
|
closed: bool = False) -> List[Camera]: |
|
""" |
|
Generate a fully smooth camera path using TSP ordering, global Catmull-Rom spline for positions, and global SLERP for orientations. |
|
|
|
Args: |
|
existing_cameras (List[Camera]): List of input cameras. |
|
n_selected (int): Number of cameras to select after ordering. |
|
n_points_per_segment (int): Number of interpolated points per spline segment. |
|
d (float): Distance ahead for estimating center of view. |
|
closed (bool): Whether to close the path. |
|
|
|
Returns: |
|
List[Camera]: List of smoothly moving Camera objects. |
|
""" |
|
positions = np.array([cam.T for cam in existing_cameras]) |
|
|
|
|
|
order = solve_tsp_nearest_neighbor(positions) |
|
ordered_cameras = [existing_cameras[i] for i in order] |
|
ordered_positions = positions[order] |
|
|
|
|
|
idx = np.linspace(0, len(ordered_positions) - 1, n_selected).astype(int) |
|
sampled_positions = ordered_positions[idx] |
|
sampled_cameras = [ordered_cameras[i] for i in idx] |
|
|
|
|
|
if closed: |
|
sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]]) |
|
else: |
|
sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]]) |
|
|
|
|
|
path_positions = [] |
|
for i in range(1, len(sampled_positions) - 2): |
|
segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment) |
|
path_positions.append(segment) |
|
path_positions = np.concatenate(path_positions, axis=0) |
|
|
|
|
|
rotations = R.from_matrix([cam.R for cam in sampled_cameras]) |
|
key_times = np.linspace(0, 1, len(rotations)) |
|
slerp = Slerp(key_times, rotations) |
|
|
|
query_times = np.linspace(0, 1, len(path_positions)) |
|
interpolated_rotations = slerp(query_times) |
|
|
|
|
|
reference_cam = existing_cameras[0] |
|
smooth_cameras = [] |
|
|
|
for i, pos in enumerate(path_positions): |
|
R_interp = interpolated_rotations[i].as_matrix() |
|
|
|
smooth_cameras.append(Camera( |
|
R=R_interp, |
|
T=pos, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"fully_smooth_path_i={i}", |
|
uid=i |
|
)) |
|
|
|
return smooth_cameras |
|
|
|
from typing import List |
|
import numpy as np |
|
from sklearn.mixture import GaussianMixture |
|
from scipy.spatial.transform import Rotation as R, Slerp |
|
from PIL import Image |
|
|
|
def generate_clustered_smooth_cameras_with_tsp(existing_cameras: List[Camera], |
|
n_selected: int = 30, |
|
n_points_per_segment: int = 20, |
|
d: float = 2.0, |
|
n_clusters: int = 5, |
|
closed: bool = False) -> List[Camera]: |
|
""" |
|
Generate a fully smooth camera path using clustering + TSP between nearest cluster centers + TSP inside clusters. |
|
Positions are normalized before clustering and denormalized before generating final cameras. |
|
|
|
Args: |
|
existing_cameras (List[Camera]): List of input cameras. |
|
n_selected (int): Number of cameras to select after ordering. |
|
n_points_per_segment (int): Number of interpolated points per spline segment. |
|
d (float): Distance ahead for estimating center of view. |
|
n_clusters (int): Number of GMM clusters. |
|
closed (bool): Whether to close the path. |
|
|
|
Returns: |
|
List[Camera]: Smooth path of Camera objects. |
|
""" |
|
|
|
positions = np.array([cam.T for cam in existing_cameras]) |
|
rotations = np.array([R.from_matrix(cam.R).as_quat() for cam in existing_cameras]) |
|
|
|
|
|
mean_pos = np.mean(positions, axis=0) |
|
scale_pos = np.std(positions, axis=0) |
|
scale_pos[scale_pos == 0] = 1.0 |
|
|
|
positions_normalized = (positions - mean_pos) / scale_pos |
|
|
|
|
|
features = positions_normalized |
|
|
|
|
|
gmm = GaussianMixture(n_components=n_clusters, covariance_type='full', random_state=42) |
|
cluster_labels = gmm.fit_predict(features) |
|
|
|
clusters = {} |
|
cluster_centers = [] |
|
|
|
for cluster_id in range(n_clusters): |
|
cluster_indices = np.where(cluster_labels == cluster_id)[0] |
|
if len(cluster_indices) == 0: |
|
continue |
|
clusters[cluster_id] = cluster_indices |
|
cluster_center = np.mean(features[cluster_indices], axis=0) |
|
cluster_centers.append(cluster_center) |
|
|
|
cluster_centers = np.stack(cluster_centers) |
|
|
|
|
|
if False: |
|
mapped_centers = [] |
|
for center in cluster_centers: |
|
dists = np.linalg.norm(features - center, axis=1) |
|
nearest_idx = np.argmin(dists) |
|
mapped_centers.append(features[nearest_idx]) |
|
mapped_centers = np.stack(mapped_centers) |
|
cluster_centers = mapped_centers |
|
|
|
cluster_order = solve_tsp_2opt(cluster_centers) |
|
|
|
|
|
final_indices = [] |
|
for cluster_id in cluster_order: |
|
cluster_indices = clusters[cluster_id] |
|
cluster_positions = features[cluster_indices] |
|
|
|
if len(cluster_positions) == 1: |
|
final_indices.append(cluster_indices[0]) |
|
continue |
|
|
|
local_order = solve_tsp_nearest_neighbor(cluster_positions) |
|
ordered_cluster_indices = cluster_indices[local_order] |
|
final_indices.extend(ordered_cluster_indices) |
|
|
|
ordered_cameras = [existing_cameras[i] for i in final_indices] |
|
ordered_positions = positions_normalized[final_indices] |
|
|
|
|
|
idx = np.linspace(0, len(ordered_positions) - 1, n_selected).astype(int) |
|
sampled_positions = ordered_positions[idx] |
|
sampled_cameras = [ordered_cameras[i] for i in idx] |
|
|
|
|
|
if closed: |
|
sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]]) |
|
else: |
|
sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]]) |
|
|
|
|
|
path_positions = [] |
|
for i in range(1, len(sampled_positions) - 2): |
|
segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment) |
|
path_positions.append(segment) |
|
path_positions = np.concatenate(path_positions, axis=0) |
|
|
|
|
|
path_positions = path_positions * scale_pos + mean_pos |
|
|
|
|
|
rotations = R.from_matrix([cam.R for cam in sampled_cameras]) |
|
key_times = np.linspace(0, 1, len(rotations)) |
|
slerp = Slerp(key_times, rotations) |
|
|
|
query_times = np.linspace(0, 1, len(path_positions)) |
|
interpolated_rotations = slerp(query_times) |
|
|
|
|
|
reference_cam = existing_cameras[0] |
|
smooth_cameras = [] |
|
|
|
for i, pos in enumerate(path_positions): |
|
R_interp = interpolated_rotations[i].as_matrix() |
|
|
|
smooth_cameras.append(Camera( |
|
R=R_interp, |
|
T=pos, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"clustered_smooth_path_i={i}", |
|
uid=i |
|
)) |
|
|
|
return smooth_cameras |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from typing import List |
|
import numpy as np |
|
from sklearn.cluster import KMeans |
|
from scipy.spatial.transform import Rotation as R, Slerp |
|
from PIL import Image |
|
|
|
def generate_clustered_path(existing_cameras: List[Camera], |
|
n_points_per_segment: int = 20, |
|
d: float = 2.0, |
|
n_clusters: int = 5, |
|
closed: bool = False) -> List[Camera]: |
|
""" |
|
Generate a smooth camera path using K-Means clustering and TSP on cluster centers. |
|
|
|
Args: |
|
existing_cameras (List[Camera]): List of input cameras. |
|
n_points_per_segment (int): Number of interpolated points per spline segment. |
|
d (float): Distance ahead for estimating center of view. |
|
n_clusters (int): Number of KMeans clusters (zones). |
|
closed (bool): Whether to close the path. |
|
|
|
Returns: |
|
List[Camera]: Smooth path of Camera objects. |
|
""" |
|
|
|
positions = np.array([cam.T for cam in existing_cameras]) |
|
|
|
|
|
mean_pos = np.mean(positions, axis=0) |
|
scale_pos = np.std(positions, axis=0) |
|
scale_pos[scale_pos == 0] = 1.0 |
|
|
|
positions_normalized = (positions - mean_pos) / scale_pos |
|
|
|
|
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto') |
|
cluster_labels = kmeans.fit_predict(positions_normalized) |
|
|
|
cluster_centers = [] |
|
for cluster_id in range(n_clusters): |
|
cluster_indices = np.where(cluster_labels == cluster_id)[0] |
|
if len(cluster_indices) == 0: |
|
continue |
|
cluster_center = np.mean(positions_normalized[cluster_indices], axis=0) |
|
cluster_centers.append(cluster_center) |
|
|
|
cluster_centers = np.stack(cluster_centers) |
|
|
|
|
|
cluster_order = solve_tsp_2opt(cluster_centers) |
|
|
|
|
|
ordered_centers = cluster_centers[cluster_order] |
|
|
|
|
|
if closed: |
|
ordered_centers = np.vstack([ordered_centers[-1], ordered_centers, ordered_centers[0], ordered_centers[1]]) |
|
else: |
|
ordered_centers = np.vstack([ordered_centers[0], ordered_centers, ordered_centers[-1], ordered_centers[-1]]) |
|
|
|
|
|
path_positions = [] |
|
for i in range(1, len(ordered_centers) - 2): |
|
segment = catmull_rom_spline(ordered_centers[i-1], ordered_centers[i], ordered_centers[i+1], ordered_centers[i+2], n_points_per_segment) |
|
path_positions.append(segment) |
|
path_positions = np.concatenate(path_positions, axis=0) |
|
|
|
|
|
path_positions = path_positions * scale_pos + mean_pos |
|
|
|
|
|
reference_cam = existing_cameras[0] |
|
default_rotation = R.from_matrix(reference_cam.R) |
|
|
|
|
|
smooth_cameras = [] |
|
|
|
for i, pos in enumerate(path_positions): |
|
R_interp = default_rotation.as_matrix() |
|
|
|
smooth_cameras.append(Camera( |
|
R=R_interp, |
|
T=pos, |
|
FoVx=reference_cam.FoVx, |
|
FoVy=reference_cam.FoVy, |
|
resolution=(reference_cam.image_width, reference_cam.image_height), |
|
colmap_id=-1, |
|
depth_params=None, |
|
image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)), |
|
invdepthmap=None, |
|
image_name=f"cluster_path_i={i}", |
|
uid=i |
|
)) |
|
|
|
return smooth_cameras |
|
|
|
|
|
|
|
|
|
def visualize_image_with_points(image, points): |
|
""" |
|
Visualize an image with points overlaid on top. This is useful for correspondences visualizations |
|
|
|
Parameters: |
|
- image: PIL Image object |
|
- points: Numpy array of shape [N, 2] containing (x, y) coordinates of points |
|
|
|
Returns: |
|
- None (displays the visualization) |
|
""" |
|
|
|
|
|
img_array = np.array(image) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(7,7)) |
|
|
|
|
|
ax.imshow(img_array) |
|
|
|
|
|
ax.scatter(points[:, 0], points[:, 1], color='red', marker='o', s=1) |
|
|
|
|
|
plt.show() |
|
|
|
|
|
def visualize_correspondences(image1, points1, image2, points2): |
|
""" |
|
Visualize two images concatenated horizontally with key points and correspondences. |
|
|
|
Parameters: |
|
- image1: PIL Image object (left image) |
|
- points1: Numpy array of shape [N, 2] containing (x, y) coordinates of key points for image1 |
|
- image2: PIL Image object (right image) |
|
- points2: Numpy array of shape [N, 2] containing (x, y) coordinates of key points for image2 |
|
|
|
Returns: |
|
- None (displays the visualization) |
|
""" |
|
|
|
|
|
concatenated_image = np.concatenate((np.array(image1), np.array(image2)), axis=1) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10,10)) |
|
|
|
|
|
ax.imshow(concatenated_image) |
|
|
|
|
|
ax.scatter(points1[:, 0], points1[:, 1], color='red', marker='o', s=10) |
|
|
|
|
|
ax.scatter(points2[:, 0] + image1.width, points2[:, 1], color='blue', marker='o', s=10) |
|
|
|
|
|
for i in range(len(points1)): |
|
ax.plot([points1[i, 0], points2[i, 0] + image1.width], [points1[i, 1], points2[i, 1]]) |
|
|
|
|
|
plt.show() |
|
|
|
|