Mithun162001 commited on
Commit
0b935c0
·
1 Parent(s): 03cd034

Upload EDAapp.py

Browse files
Files changed (1) hide show
  1. EDAapp.py +75 -0
EDAapp.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing the libraries
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import plotly.express as px
7
+ import streamlit as st
8
+
9
+ # Title and Markdown
10
+ st.title("AN EXAMPLE EDA APP")
11
+ st.markdown(''' <h3>This is an example of how to do EDA in streamlit app</h3>''',unsafe_allow_html=True)
12
+
13
+ # File upload
14
+ file_up = st.file_uploader("Upload a file", type='csv')
15
+
16
+ # Check if the file uploaded is successfull or not, if successfull then read the file
17
+ if file_up is not None:
18
+ st.success("File uploaded successfully")
19
+ df = pd.read_csv(file_up)
20
+ obj = []
21
+ int_float = []
22
+ for i in df.columns:
23
+ clas = df[i].dtypes
24
+ if clas == 'object':
25
+ obj.append(i)
26
+ else:
27
+ int_float.append(i)
28
+
29
+ # Remove null values and replace them with mean and median value
30
+ with st.form(key='my_form'):
31
+ with st.sidebar:
32
+ st.sidebar.header("To remove NULL values press below button")
33
+ submit_button = st.form_submit_button(label="Remove NULL")
34
+
35
+ if submit_button:
36
+ for i in df.columns:
37
+ clas = df[i].dtypes
38
+ if clas == 'object':
39
+ df[i].fillna(df[i].mode()[0], inplace = True)
40
+ else:
41
+ df[i].fillna(df[i].mean(), inplace = True)
42
+
43
+ # finding the number of null values in each column
44
+ ls = []
45
+ for i in df.columns:
46
+ dd = sum(pd.isnull(df[i]))
47
+ ls.append(dd)
48
+
49
+ # if number of null values are zero it will display some text else it will plot bar plot by each column
50
+ if max(ls) == 0:
51
+ st.write("Total no. of NULL values: ", str(max(ls)))
52
+ else:
53
+ st.write("Bar plot to know the number of NULL values in each column")
54
+ st.write("Total number of null values: ", str(max(ls)))
55
+ fig = px.bar(x=df.columns, y=ls,labels={'x':"Column Names",'y':"No. of Null values"})
56
+ st.plotly_chart(fig)
57
+
58
+ # Frequency Plot
59
+ st.sidebar.header("Select variable")
60
+ selected = st.sidebar.selectbox('Object variables',obj)
61
+ st.write("Bar Plot to know the frequency of each category")
62
+ frequency = df[selected].value_counts()
63
+
64
+ fig2 = px.bar(frequency, x=frequency.index,y=selected,labels={'x':selected, 'y':'count'})
65
+ st.plotly_chart(fig2)
66
+
67
+ # Correlation chart
68
+ st.sidebar.header("Select variable")
69
+ selected2 = st.sidebar.multiselect("Variables",int_float)
70
+ st.write("Scatter plot for correlation")
71
+ if len(selected2) == 2:
72
+ fig3 = px.scatter(df,x=selected2[0], y=selected2[1])
73
+ st.plotly_chart(fig3)
74
+ else:
75
+ st.write("Select any 2 variables only")