Spaces:

muhammadshaheryar
/

RAG-JAN-2025

Build error

App Files Files Community

muhammadshaheryar commited on Jan 30

Commit

66d3cf4

verified ·

1 Parent(s): 624c6eb

Create app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import streamlit as st
+import pandas as pd
+from io import StringIO
+import PyPDF2
+from docx import Document
+# Function to extract data from a CSV file
+def read_csv(file):
+    df = pd.read_csv(file)
+    return df
+# Function to extract data from an Excel file
+def read_excel(file):
+    df = pd.read_excel(file)
+    return df
+# Function to extract text from a TXT file
+def read_txt(file):
+    text = file.read().decode("utf-8")
+    return text
+# Function to extract text from a DOCX file
+def read_docx(file):
+    doc = Document(file)
+    text = "\n".join([para.text for para in doc.paragraphs])
+    return text
+# Function to extract text from a PDF file
+def read_pdf(file):
+    pdf_reader = PyPDF2.PdfFileReader(file)
+    text = ""
+    for page_num in range(pdf_reader.numPages):
+        page = pdf_reader.getPage(page_num)
+        text += page.extract_text()
+    return text
+# Streamlit app
+def main():
+    st.title("File Upload and Data Extraction App")
+    st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"])
+    if uploaded_file is not None:
+        # Determine file type and process accordingly
+        if uploaded_file.type == "text/csv":
+            data = read_csv(uploaded_file)
+            st.write("### CSV Data")
+            st.write(data)
+        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+            data = read_excel(uploaded_file)
+            st.write("### Excel Data")
+            st.write(data)
+        elif uploaded_file.type == "text/plain":
+            text = read_txt(uploaded_file)
+            st.write("### TXT Data")
+            st.write(text)
+        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            text = read_docx(uploaded_file)
+            st.write("### DOCX Data")
+            st.write(text)
+        elif uploaded_file.type == "application/pdf":
+            text = read_pdf(uploaded_file)
+            st.write("### PDF Data")
+            st.write(text)
+        else:
+            st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.")
+# Run the Streamlit app
+if __name__ == "__main__":
+    main()