JEPHONETORRE commited on
Commit
1b3143f
·
1 Parent(s): 0fa3c25
Files changed (3) hide show
  1. app.py +68 -0
  2. lda_model.pkl +3 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn.feature_extraction.text import CountVectorizer
3
+ from sklearn.decomposition import LatentDirichletAllocation
4
+ import pickle
5
+
6
+ # Title
7
+ st.title("Unsupervised Text Analysis App with Training")
8
+ st.subheader("Train an LDA Model for Topic Modeling")
9
+
10
+ # Initialize Session State
11
+ if "lda_model" not in st.session_state:
12
+ st.session_state.lda_model = None
13
+
14
+ # Built-in Dataset
15
+ st.write("### Dataset:")
16
+ texts = [
17
+ "The economy is experiencing significant growth this year.",
18
+ "Climate change is one of the most pressing global challenges.",
19
+ "Artificial intelligence is transforming industries worldwide.",
20
+ "Renewable energy sources are becoming more popular and cost-effective.",
21
+ "Sports events bring people together and promote cultural exchange.",
22
+ "Advances in medicine have greatly improved life expectancy.",
23
+ "Education plays a critical role in shaping the future of societies.",
24
+ "Travel and tourism contribute significantly to the global economy.",
25
+ "Space exploration inspires innovation and collaboration.",
26
+ "Social media platforms influence public opinion and behavior."
27
+ ]
28
+
29
+ # Display dataset
30
+ st.write(texts)
31
+
32
+ # Input: Number of Topics
33
+ st.subheader("Training Parameters")
34
+ num_topics = st.slider("Select the number of topics for training", 2, 10, 3)
35
+
36
+ # Vectorization
37
+ vectorizer = CountVectorizer(stop_words="english", max_features=1000)
38
+ doc_term_matrix = vectorizer.fit_transform(texts)
39
+
40
+ # Train LDA Model
41
+ st.subheader("Training the LDA Model")
42
+ if st.button("Train Model"):
43
+ with st.spinner("Training the LDA model..."):
44
+ lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
45
+ lda.fit(doc_term_matrix)
46
+ st.session_state.lda_model = lda # Save the trained model in session state
47
+
48
+ # Display Topics
49
+ st.success("Training Completed!")
50
+ feature_names = vectorizer.get_feature_names_out()
51
+ topics = []
52
+ for topic_idx, topic in enumerate(lda.components_):
53
+ top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]]
54
+ topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}")
55
+
56
+ st.write("### Identified Topics:")
57
+ for topic in topics:
58
+ st.write(topic)
59
+
60
+ # Save the Trained Model
61
+ st.subheader("Save the Trained Model")
62
+ if st.button("Save Model"):
63
+ if st.session_state.lda_model:
64
+ with open("lda_model.pkl", "wb") as f:
65
+ pickle.dump(st.session_state.lda_model, f)
66
+ st.success("Model saved as `lda_model.pkl`.")
67
+ else:
68
+ st.error("Please train the model first before saving.")
lda_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e8868198c4e65547d5160c31ec2a311e0a482bc361339495e9b6ad78f48d10
3
+ size 8871
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ transformers