hezhihui
commited on
Commit
·
c8d5c00
1
Parent(s):
85ffc0b
adapt for coming batch input
Browse files- config.json +1 -0
- image_processing_minicpmv.py +1 -1
- processing_minicpmv.py +4 -4
config.json
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_name_or_path": "openbmb/MiniCPM-V-2",
|
|
|
|
| 3 |
"architectures": [
|
| 4 |
"MiniCPMV"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"_name_or_path": "openbmb/MiniCPM-V-2",
|
| 3 |
+
"version": 2.0,
|
| 4 |
"architectures": [
|
| 5 |
"MiniCPMV"
|
| 6 |
],
|
image_processing_minicpmv.py
CHANGED
|
@@ -401,7 +401,7 @@ class MiniCPMVImageProcessor(BaseImageProcessor):
|
|
| 401 |
tgt_sizes = np.vstack(tgt_sizes)
|
| 402 |
|
| 403 |
return MiniCPMVBatchFeature(
|
| 404 |
-
data={"pixel_values": new_images, "image_sizes": image_sizes, "tgt_sizes": tgt_sizes}, tensor_type=return_tensors
|
| 405 |
)
|
| 406 |
|
| 407 |
AutoImageProcessor.register("MiniCPMVImageProcessor", MiniCPMVImageProcessor)
|
|
|
|
| 401 |
tgt_sizes = np.vstack(tgt_sizes)
|
| 402 |
|
| 403 |
return MiniCPMVBatchFeature(
|
| 404 |
+
data={"pixel_values": [new_images], "image_sizes": [image_sizes], "tgt_sizes": [tgt_sizes]}, tensor_type=return_tensors
|
| 405 |
)
|
| 406 |
|
| 407 |
AutoImageProcessor.register("MiniCPMVImageProcessor", MiniCPMVImageProcessor)
|
processing_minicpmv.py
CHANGED
|
@@ -125,18 +125,18 @@ class MiniCPMVProcessor(ProcessorMixin):
|
|
| 125 |
images, image_sizes = images["pixel_values"], images["image_sizes"]
|
| 126 |
|
| 127 |
image_tags = re.findall(pattern, texts)
|
| 128 |
-
assert len(image_tags) == len(image_sizes)
|
| 129 |
text_chunks = texts.split(pattern)
|
| 130 |
final_texts = ""
|
| 131 |
for i in range(len(image_tags)):
|
| 132 |
-
final_texts = final_texts + text_chunks[i] + self.image_processor.get_slice_image_placeholder(image_sizes[i])
|
| 133 |
final_texts += text_chunks[-1]
|
| 134 |
input_ids, image_bounds = self._convert(final_texts, max_length)
|
| 135 |
|
| 136 |
return MiniCPMVBatchFeature(data={
|
| 137 |
"input_ids": input_ids,
|
| 138 |
-
"pixel_values":
|
| 139 |
-
"image_sizes":
|
| 140 |
"image_bounds": [image_bounds]
|
| 141 |
}, tensor_type=return_tensors)
|
| 142 |
|
|
|
|
| 125 |
images, image_sizes = images["pixel_values"], images["image_sizes"]
|
| 126 |
|
| 127 |
image_tags = re.findall(pattern, texts)
|
| 128 |
+
assert len(image_tags) == len(image_sizes[0])
|
| 129 |
text_chunks = texts.split(pattern)
|
| 130 |
final_texts = ""
|
| 131 |
for i in range(len(image_tags)):
|
| 132 |
+
final_texts = final_texts + text_chunks[i] + self.image_processor.get_slice_image_placeholder(image_sizes[0][i])
|
| 133 |
final_texts += text_chunks[-1]
|
| 134 |
input_ids, image_bounds = self._convert(final_texts, max_length)
|
| 135 |
|
| 136 |
return MiniCPMVBatchFeature(data={
|
| 137 |
"input_ids": input_ids,
|
| 138 |
+
"pixel_values": images,
|
| 139 |
+
"image_sizes": image_sizes,
|
| 140 |
"image_bounds": [image_bounds]
|
| 141 |
}, tensor_type=return_tensors)
|
| 142 |
|