Vishal1122 commited on
Commit
bff705e
·
verified ·
1 Parent(s): 6d2a09a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -37
app.py CHANGED
@@ -8,7 +8,9 @@ from typing import Dict
8
  import gradio as gr
9
  import ollama
10
  from PIL import Image
 
11
 
 
12
 
13
  def save_temp_image(image: Image.Image) -> str:
14
  """
@@ -27,45 +29,87 @@ def save_temp_image(image: Image.Image) -> str:
27
  return tmp_file.name
28
 
29
  def id_extractor(image: Image.Image) -> Dict:
30
- """
31
- Extracts key details from the provided image using the ollama chat model.
32
 
33
- Args:
34
- image (Image.Image): The image from which to extract details.
35
 
36
- Returns:
37
- Dict: A dictionary containing the extracted details.
38
- If the image is None or an error occurs, returns an empty dictionary.
39
- """
40
- try:
41
- error_trace = None
42
-
43
- if image is None:
44
- # Return empty dictionary and make the output invisible
45
- return {}, gr.update(visible=False)
46
-
47
- # Save the image temporarily
48
- image_path = save_temp_image(image)
49
-
50
- # Send the image to the ollama chat model for processing
51
- response = ollama.chat(
52
- model='qwen2.5vl:7b',
53
- messages=[{
54
- 'role': 'user',
55
- 'content': "Extract key details like 'name', 'date of birth', 'ID number', 'Issuer' from the image as JSON, excluding signatures.",
56
- 'images': [image_path]
57
- }]
58
- )
59
-
60
- # Clean up the response content
61
- resp = response.message.content.replace("```json", "").replace("```", "").strip()
62
- return json.loads(resp)
63
-
64
- except json.JSONDecodeError as e:
65
- # Capture and print the error traceback
66
- error_trace = traceback.format_exc()
67
- print(error_trace)
68
- return "Kindly upload an image with good clarity"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Define the Gradio interface for the ID extractor
71
  id_interface = gr.Interface(
 
8
  import gradio as gr
9
  import ollama
10
  from PIL import Image
11
+ from qwen_vl_utils import process_vision_info
12
 
13
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
14
 
15
  def save_temp_image(image: Image.Image) -> str:
16
  """
 
29
  return tmp_file.name
30
 
31
  def id_extractor(image: Image.Image) -> Dict:
 
 
32
 
33
+
34
+ # default: Load the model on the available device(s)
35
 
36
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
37
+
38
+ "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
39
+
40
+ )
41
+
42
+ # default processer
43
+
44
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
45
+
46
+ messages = [
47
+
48
+ {
49
+
50
+ "role": "user",
51
+
52
+ "content": [
53
+
54
+ {
55
+
56
+ "type": "image",
57
+
58
+ "image": image,
59
+
60
+ },
61
+
62
+ {"type": "text", "text": "Extract key details like 'name', 'date of birth', 'ID number', 'Issuer' from the image as JSON, excluding signatures. Note if a ID has two names, pick the first one."},
63
+
64
+ ],
65
+
66
+ }
67
+
68
+ ]
69
+
70
+ # Preparation for inference
71
+
72
+ text = processor.apply_chat_template(
73
+
74
+ messages, tokenize=False, add_generation_prompt=True
75
+
76
+ )
77
+
78
+ image_inputs, video_inputs = process_vision_info(messages)
79
+
80
+ inputs = processor(
81
+
82
+ text=[text],
83
+
84
+ images=image_inputs,
85
+
86
+ videos=video_inputs,
87
+
88
+ padding=True,
89
+
90
+ return_tensors="pt",
91
+
92
+ )
93
+
94
+
95
+ # Inference: Generation of the output
96
+
97
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
98
+
99
+ generated_ids_trimmed = [
100
+
101
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
102
+
103
+ ]
104
+
105
+ output_text = processor.batch_decode(
106
+
107
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
108
+
109
+ )
110
+
111
+ return output_text
112
+
113
 
114
  # Define the Gradio interface for the ID extractor
115
  id_interface = gr.Interface(