Mohammed422 commited on
Commit
bc5a7a0
·
1 Parent(s): 714bf44
Files changed (1) hide show
  1. app.py +0 -247
app.py CHANGED
@@ -1,250 +1,3 @@
1
- # import gradio as gr
2
- # import spaces
3
- # from gradio.themes.base import Base
4
- # from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
5
- # from qwen_vl_utils import process_vision_info
6
- # from PIL import Image
7
- # from datetime import datetime
8
- # import os
9
- # import json
10
- # import fitz # PyMuPDF
11
-
12
- # # Define a custom theme inheriting from the soft theme
13
- # class CustomTheme(Base):
14
- # def __init__(self):
15
- # super().__init__()
16
- # self.primary_hue = "blue"
17
- # self.secondary_hue = "sky"
18
-
19
- # custom_theme = CustomTheme()
20
-
21
- # DESCRIPTION = "A powerful vision-language model that can understand images and text to provide detailed analysis."
22
-
23
- # def array_to_image_path(image_filepath, max_width=1250, max_height=1750):
24
- # if image_filepath is None:
25
- # raise ValueError("No image provided.")
26
-
27
- # img = Image.open(image_filepath)
28
- # width, height = img.size
29
- # if width > max_width or height > max_height:
30
- # img.thumbnail((max_width, max_height))
31
-
32
- # return os.path.abspath(image_filepath), img.width, img.height
33
-
34
- # def convert_pdf_to_images(pdf_path):
35
- # """Opens a PDF and converts each page into a high-resolution PNG image."""
36
- # image_paths = []
37
- # doc = fitz.open(pdf_path)
38
- # base_name = os.path.splitext(os.path.basename(pdf_path))[0]
39
-
40
- # for i, page in enumerate(doc):
41
- # pix = page.get_pixmap(dpi=200)
42
- # timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
43
- # image_path = f"{base_name}_page_{i+1}_{timestamp}.png"
44
- # pix.save(image_path)
45
- # image_paths.append(image_path)
46
-
47
- # doc.close()
48
- # return image_paths
49
-
50
- # # Initialize the model and processor
51
- # model = Qwen2VLForConditionalGeneration.from_pretrained(
52
- # "Qwen/Qwen2-VL-7B-Instruct",
53
- # torch_dtype="auto",
54
- # device_map="auto"
55
- # )
56
- # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
57
-
58
- # @spaces.GPU
59
- # def run_inference(uploaded_files, text_input):
60
- # results = []
61
- # temp_files_to_clean = []
62
-
63
- # json_prompt = (
64
- # f"{text_input}\n\nBased on the image and the query, respond ONLY with a single, "
65
- # "valid JSON object. This object should be well-structured, using nested objects "
66
- # "and arrays to logically represent the information."
67
- # )
68
-
69
- # if not uploaded_files:
70
- # error_json = json.dumps({"error": "No file provided. Please upload an image or PDF."}, indent=4)
71
- # return error_json, gr.Button(interactive=False)
72
-
73
- # image_paths_to_process = []
74
- # unsupported_files = []
75
- # for file_obj in uploaded_files:
76
- # file_path = file_obj.name
77
- # temp_files_to_clean.append(file_path)
78
-
79
- # if file_path.lower().endswith('.pdf'):
80
- # pdf_page_images = convert_pdf_to_images(file_path)
81
- # image_paths_to_process.extend(pdf_page_images)
82
- # temp_files_to_clean.extend(pdf_page_images)
83
- # elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.webp')):
84
- # image_paths_to_process.append(file_path)
85
- # else:
86
- # unsupported_files.append(os.path.basename(file_path))
87
-
88
- # if unsupported_files:
89
- # unsupported_str = ", ".join(unsupported_files)
90
- # results.append(json.dumps({
91
- # "error": f"Unsupported file type(s) were ignored: {unsupported_str}",
92
- # "details": "Please upload only images (PNG, JPG, etc.) or PDF files."
93
- # }, indent=4))
94
-
95
- # for image_file in image_paths_to_process:
96
- # try:
97
- # image_path, width, height = array_to_image_path(image_file)
98
-
99
- # messages = [
100
- # {"role": "user", "content": [
101
- # {"type": "image", "image": image_path, "resized_height": height, "resized_width": width},
102
- # {"type": "text", "text": json_prompt}
103
- # ]}
104
- # ]
105
- # text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
106
- # image_inputs, video_inputs = process_vision_info(messages)
107
- # inputs = processor(text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt").to("cuda")
108
-
109
- # generated_ids = model.generate(**inputs, max_new_tokens=4096)
110
- # generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
111
- # raw_output = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True)
112
- # raw_text = raw_output[0]
113
-
114
- # try:
115
- # start_index = raw_text.find('{')
116
- # end_index = raw_text.rfind('}') + 1
117
- # if start_index != -1 and end_index != 0:
118
- # json_string = raw_text[start_index:end_index]
119
- # parsed_json = json.loads(json_string)
120
- # parsed_json['source_page'] = os.path.basename(image_path)
121
- # formatted_json = json.dumps(parsed_json, indent=4)
122
- # results.append(formatted_json)
123
- # else:
124
- # results.append(f'{{"error": "Model did not return valid JSON.", "source_page": "{os.path.basename(image_path)}", "raw_response": "{raw_text}"}}')
125
- # except json.JSONDecodeError:
126
- # results.append(f'{{"error": "Failed to decode JSON.", "source_page": "{os.path.basename(image_path)}", "raw_response": "{raw_text}"}}')
127
- # except Exception as e:
128
- # results.append(f'{{"error": "An unexpected error occurred during processing.", "details": "{str(e)}"}}')
129
-
130
- # for f in temp_files_to_clean:
131
- # if os.path.exists(f):
132
- # try:
133
- # os.remove(f)
134
- # except OSError as e:
135
- # print(f"Error deleting file {f}: {e}")
136
-
137
- # final_json = "\n---\n".join(results)
138
- # is_error = '"error":' in final_json
139
- # return final_json, gr.Button(interactive=not is_error)
140
-
141
-
142
- # @spaces.GPU
143
- # def generate_explanation(json_text):
144
- # if not json_text or '"error":' in json_text:
145
- # return "Cannot generate an explanation. Please produce a valid JSON output first. 🙁"
146
-
147
- # explanation_prompt = (
148
- # "You are an expert data analyst. Your task is to provide a comprehensive, human-readable explanation "
149
- # "of the following JSON data, which may represent one or more pages from a document. First, provide a textual explanation. "
150
- # "If the JSON contains data from multiple sources (pages), explain each one. Then, if the JSON data represents a table, "
151
- # "a list of items, or a receipt, you **must** re-format the key information into a Markdown table for clarity.\n\n"
152
- # f"JSON Data:\n```json\n{json_text}\n```"
153
- # )
154
-
155
- # messages = [{"role": "user", "content": explanation_prompt}]
156
- # text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
157
- # inputs = processor(text=[text], return_tensors="pt").to("cuda")
158
-
159
- # generated_ids = model.generate(**inputs, max_new_tokens=2048)
160
- # generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
161
- # explanation_output = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True)[0]
162
-
163
- # return explanation_output
164
-
165
- # # --- FINAL AND MOST ROBUST CSS FIX ---
166
- # css = """
167
- # .gradio-container { font-family: 'IBM Plex Sans', sans-serif; }
168
-
169
- # /* --- Light Mode Styles --- */
170
- # #output-code, #output-code pre, #output-code code {
171
- # background-color: #f0f0f0;
172
- # border: 1px solid #e0e0e0;
173
- # border-radius: 7px;
174
- # color: #333;
175
- # }
176
- # #output-code .token.punctuation { color: #393a34; }
177
- # #output-code .token.property, #output-code .token.string { color: #0b7500; }
178
- # #output-code .token.number { color: #2973b7; }
179
- # #output-code .token.boolean { color: #9a050f; }
180
-
181
- # #explanation-box {
182
- # min-height: 200px;
183
- # border: 1px solid #e0e0e0;
184
- # padding: 15px;
185
- # border-radius: 7px;
186
- # }
187
-
188
- # /* --- Dark Mode Overrides targeting Gradio's .dark class --- */
189
- # .dark #output-code, .dark #output-code pre, .dark #output-code code {
190
- # background-color: #2b2b2b !important;
191
- # border: 1px solid #444 !important;
192
- # color: #f0f0f0 !important;
193
- # }
194
- # .dark #explanation-box {
195
- # border: 1px solid #444 !important;
196
- # }
197
- # /* This is a catch-all to ensure all parts of the syntax start light-colored */
198
- # .dark #output-code code span {
199
- # color: #f0f0f0 !important;
200
- # }
201
- # /* Then, we apply specific colors for syntax highlighting on top */
202
- # .dark #output-code .token.punctuation { color: #ccc !important; }
203
- # .dark #output-code .token.property, .dark #output-code .token.string { color: #90ee90 !important; }
204
- # .dark #output-code .token.number { color: #add8e6 !important; }
205
- # .dark #output-code .token.boolean { color: #f08080 !important; }
206
- # """
207
-
208
- # with gr.Blocks(theme=custom_theme, css=css) as demo:
209
- # gr.Markdown("# Sparrow Qwen2-VL-7B Vision AI 👁️")
210
- # gr.Markdown(DESCRIPTION)
211
-
212
- # with gr.Row():
213
- # with gr.Column(scale=1):
214
- # input_files = gr.Files(label="Upload Images or PDFs")
215
- # text_input = gr.Textbox(
216
- # label="Your Query",
217
- # placeholder="e.g., Extract the total amount from this receipt."
218
- # )
219
- # submit_btn = gr.Button("Analyze File(s)", variant="primary")
220
-
221
- # with gr.Column(scale=2):
222
- # output_text = gr.Code(
223
- # label="Full JSON Response",
224
- # language="json",
225
- # elem_id="output-code",
226
- # interactive=False # This makes the output field read-only
227
- # )
228
- # explanation_btn = gr.Button("📄 Generate Detailed Explanation", interactive=False)
229
- # explanation_output = gr.Markdown(label="Detailed Explanation", elem_id="explanation-box")
230
-
231
- # submit_btn.click(
232
- # fn=run_inference,
233
- # inputs=[input_files, text_input],
234
- # outputs=[output_text, explanation_btn]
235
- # )
236
-
237
- # explanation_btn.click(
238
- # fn=generate_explanation,
239
- # inputs=[output_text],
240
- # outputs=[explanation_output],
241
- # show_progress='full'
242
- # )
243
-
244
- # demo.queue()
245
- # demo.launch(debug=True)
246
-
247
-
248
  import gradio as gr
249
  import spaces
250
  from gradio.themes.base import Base
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spaces
3
  from gradio.themes.base import Base