Spaces:
Running
Running
File size: 594 Bytes
a53dc0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# detector/utils.py
from PyPDF2 import PdfReader
import docx
async def extract_text_from_file(file):
filename = file.filename.lower()
if filename.endswith(".pdf"):
reader = PdfReader(file.file)
return "\n".join([page.extract_text() or "" for page in reader.pages])
elif filename.endswith(".docx"):
document = docx.Document(file.file)
return "\n".join([para.text for para in document.paragraphs])
elif filename.endswith(".txt"):
return (await file.read()).decode("utf-8")
else:
raise ValueError("Unsupported file type.")
|