Spaces:

JEPHONETORRE
/

text_analysis_app.py

Running

App Files Files Community

JEPHONETORRE commited on Mar 4

Commit

1b3143f

1 Parent(s): 0fa3c25

1

Browse files

Files changed (3) hide show

app.py +68 -0
lda_model.pkl +3 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import streamlit as st
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.decomposition import LatentDirichletAllocation
+import pickle
+# Title
+st.title("Unsupervised Text Analysis App with Training")
+st.subheader("Train an LDA Model for Topic Modeling")
+# Initialize Session State
+if "lda_model" not in st.session_state:
+    st.session_state.lda_model = None
+# Built-in Dataset
+st.write("### Dataset:")
+texts = [
+    "The economy is experiencing significant growth this year.",
+    "Climate change is one of the most pressing global challenges.",
+    "Artificial intelligence is transforming industries worldwide.",
+    "Renewable energy sources are becoming more popular and cost-effective.",
+    "Sports events bring people together and promote cultural exchange.",
+    "Advances in medicine have greatly improved life expectancy.",
+    "Education plays a critical role in shaping the future of societies.",
+    "Travel and tourism contribute significantly to the global economy.",
+    "Space exploration inspires innovation and collaboration.",
+    "Social media platforms influence public opinion and behavior."
+]
+# Display dataset
+st.write(texts)
+# Input: Number of Topics
+st.subheader("Training Parameters")
+num_topics = st.slider("Select the number of topics for training", 2, 10, 3)
+# Vectorization
+vectorizer = CountVectorizer(stop_words="english", max_features=1000)
+doc_term_matrix = vectorizer.fit_transform(texts)
+# Train LDA Model
+st.subheader("Training the LDA Model")
+if st.button("Train Model"):
+    with st.spinner("Training the LDA model..."):
+        lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
+        lda.fit(doc_term_matrix)
+        st.session_state.lda_model = lda  # Save the trained model in session state
+    # Display Topics
+    st.success("Training Completed!")
+    feature_names = vectorizer.get_feature_names_out()
+    topics = []
+    for topic_idx, topic in enumerate(lda.components_):
+        top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]]
+        topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}")
+    st.write("### Identified Topics:")
+    for topic in topics:
+        st.write(topic)
+# Save the Trained Model
+st.subheader("Save the Trained Model")
+if st.button("Save Model"):
+    if st.session_state.lda_model:
+        with open("lda_model.pkl", "wb") as f:
+            pickle.dump(st.session_state.lda_model, f)
+        st.success("Model saved as `lda_model.pkl`.")
+    else:
+        st.error("Please train the model first before saving.")

lda_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7e8868198c4e65547d5160c31ec2a311e0a482bc361339495e9b6ad78f48d10
+size 8871

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+scikit-learn
+transformers