Spaces:
Sleeping
Sleeping
Commit
·
9fc7514
1
Parent(s):
08620e1
Added TopicData objects
Browse files- Dockerfile +3 -3
- keynmf_data.joblib +3 -0
- main.py +3 -26
- top2vec_data.joblib +3 -0
Dockerfile
CHANGED
@@ -8,8 +8,7 @@ RUN apt install -y git
|
|
8 |
|
9 |
RUN pip install gunicorn==20.1.0
|
10 |
RUN pip install typing-extensions
|
11 |
-
RUN pip install topic-wizard
|
12 |
-
RUN pip install "turftopic>=0.13.0"
|
13 |
|
14 |
RUN useradd -m -u 1000 user
|
15 |
# Switch to the "user" user
|
@@ -32,6 +31,7 @@ RUN git clone https://github.com/x-tabdeveloping/topicwizard
|
|
32 |
WORKDIR $HOME/app/topicwizard
|
33 |
RUN git checkout topic-arena
|
34 |
RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
|
35 |
-
RUN cp $HOME/app/
|
|
|
36 |
EXPOSE 7860
|
37 |
CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
|
|
|
8 |
|
9 |
RUN pip install gunicorn==20.1.0
|
10 |
RUN pip install typing-extensions
|
11 |
+
RUN pip install "turftopic[topic-wizard]"
|
|
|
12 |
|
13 |
RUN useradd -m -u 1000 user
|
14 |
# Switch to the "user" user
|
|
|
31 |
WORKDIR $HOME/app/topicwizard
|
32 |
RUN git checkout topic-arena
|
33 |
RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
|
34 |
+
RUN cp $HOME/app/keynmf_data.joblib $HOME/app/topicwizard/keynmf_data.joblib
|
35 |
+
RUN cp $HOME/app/top2vec_data.joblib $HOME/app/topicwizard/top2vec_data.joblib
|
36 |
EXPOSE 7860
|
37 |
CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
|
keynmf_data.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08ffa99e4c964c6874e7f7ecb75dadb3caf63ea3a6f8a639c117c7df9864c07f
|
3 |
+
size 147687747
|
main.py
CHANGED
@@ -1,14 +1,10 @@
|
|
1 |
import dash_mantine_components as dmc
|
2 |
-
import joblib
|
3 |
-
import numpy as np
|
4 |
from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
|
5 |
dcc, exceptions, html)
|
6 |
-
from sentence_transformers import SentenceTransformer
|
7 |
-
from sklearn.datasets import fetch_20newsgroups
|
8 |
from topicwizard.widgets import (ConceptClusters, DocumentClusters,
|
9 |
TopicBrowser, TopicHierarchy,
|
10 |
create_widget_container)
|
11 |
-
from turftopic import
|
12 |
|
13 |
|
14 |
def create_app(blueprint):
|
@@ -29,27 +25,8 @@ def create_app(blueprint):
|
|
29 |
return app
|
30 |
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
print("Calculating embeddings")
|
36 |
-
encoder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
|
37 |
-
embeddings = encoder.encode(corpus, show_progress_bar=True)
|
38 |
-
|
39 |
-
print("Fitting keynmf")
|
40 |
-
keynmf = KeyNMF(5, encoder=encoder, random_state=42)
|
41 |
-
keynmf_data = keynmf.prepare_topic_data(corpus, embeddings=embeddings)
|
42 |
-
keynmf_data.hierarchy.divide_children(5)
|
43 |
-
|
44 |
-
print("Fitting top2vec")
|
45 |
-
top2vec = ClusteringTopicModel(
|
46 |
-
n_reduce_to=5,
|
47 |
-
feature_importance="centroid",
|
48 |
-
encoder=encoder,
|
49 |
-
random_state=0,
|
50 |
-
)
|
51 |
-
top2vec_data = top2vec.prepare_topic_data(corpus, embeddings=embeddings)
|
52 |
-
|
53 |
print("Building blueprints.")
|
54 |
keynmf_blueprint = create_widget_container(
|
55 |
[TopicBrowser(), ConceptClusters(), TopicHierarchy()],
|
|
|
1 |
import dash_mantine_components as dmc
|
|
|
|
|
2 |
from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
|
3 |
dcc, exceptions, html)
|
|
|
|
|
4 |
from topicwizard.widgets import (ConceptClusters, DocumentClusters,
|
5 |
TopicBrowser, TopicHierarchy,
|
6 |
create_widget_container)
|
7 |
+
from turftopic.data import TopicData
|
8 |
|
9 |
|
10 |
def create_app(blueprint):
|
|
|
25 |
return app
|
26 |
|
27 |
|
28 |
+
keynmf_data = TopicData.from_disk("keynmf_data.joblib")
|
29 |
+
top2vec_data = TopicData.from_disk("top2vec_data.joblib")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
print("Building blueprints.")
|
31 |
keynmf_blueprint = create_widget_container(
|
32 |
[TopicBrowser(), ConceptClusters(), TopicHierarchy()],
|
top2vec_data.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4529b2dc32c6e1b87fc8c24367fbbc20f901b60296d28feef50dbe175913a03a
|
3 |
+
size 177605661
|