Update app.py
Browse files
app.py
CHANGED
|
@@ -422,18 +422,18 @@ def inference(image, audio, video, task_type, instruction):
|
|
| 422 |
instruction = 'which region does the text " {} " describe?'.format(instruction)
|
| 423 |
transform = refcoco_transform
|
| 424 |
cfg = refcoco_cfg
|
| 425 |
-
elif task_type
|
| 426 |
task = general_task
|
| 427 |
models = general_models
|
| 428 |
generator = general_generator
|
| 429 |
transform = general_transform
|
| 430 |
cfg = general_cfg
|
| 431 |
-
elif task_type == 'General Video':
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
else:
|
| 438 |
raise NotImplementedError
|
| 439 |
|
|
@@ -476,7 +476,13 @@ examples = [
|
|
| 476 |
['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
|
| 477 |
['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
|
| 478 |
['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
|
| 479 |
-
['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
]
|
| 481 |
|
| 482 |
title = "UnIVAL"
|
|
|
|
| 422 |
instruction = 'which region does the text " {} " describe?'.format(instruction)
|
| 423 |
transform = refcoco_transform
|
| 424 |
cfg = refcoco_cfg
|
| 425 |
+
elif task_type in ['General', 'General Video']:
|
| 426 |
task = general_task
|
| 427 |
models = general_models
|
| 428 |
generator = general_generator
|
| 429 |
transform = general_transform
|
| 430 |
cfg = general_cfg
|
| 431 |
+
# elif task_type == 'General Video':
|
| 432 |
+
# task = general_task
|
| 433 |
+
# models = video_general_models
|
| 434 |
+
# generator = video_general_generator
|
| 435 |
+
# transform = general_transform
|
| 436 |
+
# cfg = video_general_cfg
|
| 437 |
else:
|
| 438 |
raise NotImplementedError
|
| 439 |
|
|
|
|
| 476 |
['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
|
| 477 |
['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
|
| 478 |
['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
|
| 479 |
+
['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?'],
|
| 480 |
+
[None, None, 'examples/videos/video7014.mp4', 'Video Captioning', None],
|
| 481 |
+
[None, None, 'examples/videos/video7017.mp4', 'Video Captioning', None],
|
| 482 |
+
[None, None, 'examples/videos/video7019.mp4', 'Video Captioning', None],
|
| 483 |
+
[None, None, 'examples/videos/video7021.mp4', 'Video Captioning', None],
|
| 484 |
+
[None, 'examples/audios/6cS0FsUM-cQ.wav', None, 'Audio Captioning', None],
|
| 485 |
+
[None, 'examples/audios/AJtNitYMa1I.wav', None, 'Audio Captioning', None],
|
| 486 |
]
|
| 487 |
|
| 488 |
title = "UnIVAL"
|