Spaces:
Running
Running
# detector/utils.py | |
from PyPDF2 import PdfReader | |
import docx | |
async def extract_text_from_file(file): | |
filename = file.filename.lower() | |
if filename.endswith(".pdf"): | |
reader = PdfReader(file.file) | |
return "\n".join([page.extract_text() or "" for page in reader.pages]) | |
elif filename.endswith(".docx"): | |
document = docx.Document(file.file) | |
return "\n".join([para.text for para in document.paragraphs]) | |
elif filename.endswith(".txt"): | |
return (await file.read()).decode("utf-8") | |
else: | |
raise ValueError("Unsupported file type.") | |