muhammadshaheryar commited on
Commit
66d3cf4
·
verified ·
1 Parent(s): 624c6eb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from io import StringIO
4
+ import PyPDF2
5
+ from docx import Document
6
+
7
+ # Function to extract data from a CSV file
8
+ def read_csv(file):
9
+ df = pd.read_csv(file)
10
+ return df
11
+
12
+ # Function to extract data from an Excel file
13
+ def read_excel(file):
14
+ df = pd.read_excel(file)
15
+ return df
16
+
17
+ # Function to extract text from a TXT file
18
+ def read_txt(file):
19
+ text = file.read().decode("utf-8")
20
+ return text
21
+
22
+ # Function to extract text from a DOCX file
23
+ def read_docx(file):
24
+ doc = Document(file)
25
+ text = "\n".join([para.text for para in doc.paragraphs])
26
+ return text
27
+
28
+ # Function to extract text from a PDF file
29
+ def read_pdf(file):
30
+ pdf_reader = PyPDF2.PdfFileReader(file)
31
+ text = ""
32
+ for page_num in range(pdf_reader.numPages):
33
+ page = pdf_reader.getPage(page_num)
34
+ text += page.extract_text()
35
+ return text
36
+
37
+ # Streamlit app
38
+ def main():
39
+ st.title("File Upload and Data Extraction App")
40
+ st.write("Upload a file (CSV, Excel, TXT, DOCX, or PDF) to extract data.")
41
+
42
+ # File uploader
43
+ uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx", "txt", "docx", "pdf"])
44
+
45
+ if uploaded_file is not None:
46
+ # Determine file type and process accordingly
47
+ if uploaded_file.type == "text/csv":
48
+ data = read_csv(uploaded_file)
49
+ st.write("### CSV Data")
50
+ st.write(data)
51
+
52
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
53
+ data = read_excel(uploaded_file)
54
+ st.write("### Excel Data")
55
+ st.write(data)
56
+
57
+ elif uploaded_file.type == "text/plain":
58
+ text = read_txt(uploaded_file)
59
+ st.write("### TXT Data")
60
+ st.write(text)
61
+
62
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
63
+ text = read_docx(uploaded_file)
64
+ st.write("### DOCX Data")
65
+ st.write(text)
66
+
67
+ elif uploaded_file.type == "application/pdf":
68
+ text = read_pdf(uploaded_file)
69
+ st.write("### PDF Data")
70
+ st.write(text)
71
+
72
+ else:
73
+ st.error("Unsupported file type. Please upload a CSV, Excel, TXT, DOCX, or PDF file.")
74
+
75
+ # Run the Streamlit app
76
+ if __name__ == "__main__":
77
+ main()