Spaces:
Running
Running
Update quiz.py
Browse files
quiz.py
CHANGED
|
@@ -106,16 +106,20 @@ class DocumentProcessor:
|
|
| 106 |
return self._clean_text(source)
|
| 107 |
|
| 108 |
def _load_pdf(self, path: str) -> str:
|
| 109 |
-
doc
|
| 110 |
pages = []
|
|
|
|
| 111 |
for page_num, page in enumerate(doc):
|
| 112 |
-
blocks = page.get_text("blocks")
|
| 113 |
text = " ".join(block[4] for block in blocks if block[4].strip())
|
|
|
|
| 114 |
text = self._clean_text(text)
|
|
|
|
| 115 |
if text.strip():
|
| 116 |
pages.append(f"[Page {page_num + 1}]\n{text}")
|
|
|
|
| 117 |
full_text = "\n\n".join(pages)
|
| 118 |
-
print(f"
|
| 119 |
return full_text
|
| 120 |
|
| 121 |
def _clean_text(self, text: str) -> str:
|
|
|
|
| 106 |
return self._clean_text(source)
|
| 107 |
|
| 108 |
def _load_pdf(self, path: str) -> str:
|
| 109 |
+
doc = fitz.open(path)
|
| 110 |
pages = []
|
| 111 |
+
|
| 112 |
for page_num, page in enumerate(doc):
|
| 113 |
+
blocks = page.get_text("blocks")
|
| 114 |
text = " ".join(block[4] for block in blocks if block[4].strip())
|
| 115 |
+
|
| 116 |
text = self._clean_text(text)
|
| 117 |
+
|
| 118 |
if text.strip():
|
| 119 |
pages.append(f"[Page {page_num + 1}]\n{text}")
|
| 120 |
+
|
| 121 |
full_text = "\n\n".join(pages)
|
| 122 |
+
print(f" Loaded PDF: {len(doc)} pages, {len(full_text):,} chars")
|
| 123 |
return full_text
|
| 124 |
|
| 125 |
def _clean_text(self, text: str) -> str:
|