|
|
|
import os |
|
import glob |
|
import json |
|
import tqdm |
|
import natsort |
|
import random |
|
|
|
from PIL import Image |
|
|
|
import numpy as np |
|
|
|
import torch |
|
from torch.utils.data import Dataset, DataLoader |
|
|
|
import clip |
|
|
|
from torchvision import models |
|
|
|
from config import config |
|
|
|
|
|
class loading_img(Dataset): |
|
def __init__(self, img_list): |
|
self.img_list = img_list |
|
|
|
def __len__(self): |
|
return len(self.img_list) |
|
|
|
def __getitem__(self, idx): |
|
return preprocess(Image.open(self.img_list[idx])) |
|
|
|
|
|
|
|
def select_frames(folder_list, preprocess, resnet18_pretrained): |
|
for folder in folder_list: |
|
img_list = natsort.natsorted(glob.glob(f"{folder}/*.jpg")) |
|
img_feats = [] |
|
|
|
img_set = loading_img(img_list) |
|
img_loader = DataLoader(img_set, batch_size=64, shuffle=False, num_workers=16) |
|
|
|
for imgtensor in img_loader: img_feats.append(imgtensor) |
|
img_feats = torch.concat(img_feats, dim=0).to(device) |
|
|
|
with torch.no_grad(): |
|
featuremap = resnet18_pretrained(img_feats) |
|
frame_num = featuremap.shape[0] |
|
|
|
dist_list = [] |
|
for img_feat in featuremap: dist_list.append(torch.mean(torch.sqrt((featuremap-img_feat)**2), dim=-1)) |
|
dist_list = torch.concat(dist_list).reshape(frame_num, frame_num) |
|
|
|
idx_list = [_ for _ in range(frame_num)] |
|
loop_idx = 0 |
|
out_frames = [] |
|
|
|
output_results = [] |
|
while len(idx_list) > 5: |
|
dist_idx = idx_list.pop(0) |
|
|
|
data = dist_list[dist_idx, idx_list].softmax(dim=-1) |
|
mu, std = torch.mean(data), torch.std(data) |
|
pop_idx_list = torch.where(data < mu-std*(np.exp(1-loop_idx/config.divlam)))[0].detach().cpu().numpy() |
|
result = list(np.array(idx_list)[pop_idx_list]) |
|
result.append(dist_idx) |
|
output_results.append(result) |
|
|
|
num_picks = 18 |
|
if len(result) > num_picks: |
|
idx_result_list = sorted(random.sample(result, num_picks)) |
|
img_list = np.array(img_list) |
|
idx_result_list = np.array(idx_result_list) |
|
out_frames.extend(img_list[idx_result_list]) |
|
else: |
|
idx_result_list = sorted(result) |
|
img_list = np.array(img_list) |
|
idx_result_list = np.array(idx_result_list) |
|
out_frames.extend(img_list[idx_result_list]) |
|
|
|
loop_idx += 1 |
|
|
|
for pop_idx in reversed(pop_idx_list): idx_list.pop(pop_idx) |
|
|
|
return out_frames, output_results |
|
|
|
|
|
|
|
random.seed(10) |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
resnet18_pretrained = models.resnet18(pretrained=True).to(device) |
|
resnet18_pretrained.fc = torch.nn.Identity() |
|
resnet18_pretrained.avgpool = torch.nn.Identity() |
|
resnet18_pretrained.eval() |
|
|
|
model, preprocess = clip.load("ViT-B/32", device=device) |
|
|
|
objs_acts = config.f_path |
|
questions = config.q_path |
|
|
|
questions = [json.loads(q) for q in open(os.path.expanduser(questions), "r")] |
|
objs_acts = [json.loads(q) for q in open(os.path.expanduser(objs_acts), "r")] |
|
|
|
answer_path = os.path.expanduser(config.a_path) |
|
os.makedirs(os.path.dirname(answer_path), exist_ok=True) |
|
ans_file = open(answer_path, "w") |
|
|
|
output_results = [] |
|
for question in tqdm.tqdm(questions): |
|
test_token = True |
|
|
|
for objs_act in objs_acts: |
|
if objs_act['q_uid'] == question['q_uid']: |
|
question['Object'] = objs_act["Activity"] |
|
question['Activity'] = objs_act["Activity"] |
|
|
|
folder_list = glob.glob(f"{config.img_folder}/{question['q_uid']}/") |
|
out_frames, output_result = select_frames(folder_list, preprocess, resnet18_pretrained) |
|
output_results.append(output_result) |
|
question['filepath'] = out_frames |
|
|
|
ans_file.write(json.dumps(question) + "\n") |
|
test_token = False |
|
break |