pratyushjdhv commited on
Commit
7d7afb1
·
verified ·
1 Parent(s): bbfa9b8

Update quiz.py

Browse files
Files changed (1) hide show
  1. quiz.py +7 -3
quiz.py CHANGED
@@ -106,16 +106,20 @@ class DocumentProcessor:
106
  return self._clean_text(source)
107
 
108
  def _load_pdf(self, path: str) -> str:
109
- doc = fitz.open(path)
110
  pages = []
 
111
  for page_num, page in enumerate(doc):
112
- blocks = page.get_text("blocks")\
113
  text = " ".join(block[4] for block in blocks if block[4].strip())
 
114
  text = self._clean_text(text)
 
115
  if text.strip():
116
  pages.append(f"[Page {page_num + 1}]\n{text}")
 
117
  full_text = "\n\n".join(pages)
118
- print(f" Loaded PDF: {len(doc)} pages, {len(full_text):,} chars")
119
  return full_text
120
 
121
  def _clean_text(self, text: str) -> str:
 
106
  return self._clean_text(source)
107
 
108
  def _load_pdf(self, path: str) -> str:
109
+ doc = fitz.open(path)
110
  pages = []
111
+
112
  for page_num, page in enumerate(doc):
113
+ blocks = page.get_text("blocks")
114
  text = " ".join(block[4] for block in blocks if block[4].strip())
115
+
116
  text = self._clean_text(text)
117
+
118
  if text.strip():
119
  pages.append(f"[Page {page_num + 1}]\n{text}")
120
+
121
  full_text = "\n\n".join(pages)
122
+ print(f" Loaded PDF: {len(doc)} pages, {len(full_text):,} chars")
123
  return full_text
124
 
125
  def _clean_text(self, text: str) -> str: