kardosdrur commited on
Commit
9fc7514
·
1 Parent(s): 08620e1

Added TopicData objects

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -3
  2. keynmf_data.joblib +3 -0
  3. main.py +3 -26
  4. top2vec_data.joblib +3 -0
Dockerfile CHANGED
@@ -8,8 +8,7 @@ RUN apt install -y git
8
 
9
  RUN pip install gunicorn==20.1.0
10
  RUN pip install typing-extensions
11
- RUN pip install topic-wizard
12
- RUN pip install "turftopic>=0.13.0"
13
 
14
  RUN useradd -m -u 1000 user
15
  # Switch to the "user" user
@@ -32,6 +31,7 @@ RUN git clone https://github.com/x-tabdeveloping/topicwizard
32
  WORKDIR $HOME/app/topicwizard
33
  RUN git checkout topic-arena
34
  RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
35
- RUN cp $HOME/app/corpus.txt $HOME/app/topicwizard/corpus.txt
 
36
  EXPOSE 7860
37
  CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
 
8
 
9
  RUN pip install gunicorn==20.1.0
10
  RUN pip install typing-extensions
11
+ RUN pip install "turftopic[topic-wizard]"
 
12
 
13
  RUN useradd -m -u 1000 user
14
  # Switch to the "user" user
 
31
  WORKDIR $HOME/app/topicwizard
32
  RUN git checkout topic-arena
33
  RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
34
+ RUN cp $HOME/app/keynmf_data.joblib $HOME/app/topicwizard/keynmf_data.joblib
35
+ RUN cp $HOME/app/top2vec_data.joblib $HOME/app/topicwizard/top2vec_data.joblib
36
  EXPOSE 7860
37
  CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
keynmf_data.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ffa99e4c964c6874e7f7ecb75dadb3caf63ea3a6f8a639c117c7df9864c07f
3
+ size 147687747
main.py CHANGED
@@ -1,14 +1,10 @@
1
  import dash_mantine_components as dmc
2
- import joblib
3
- import numpy as np
4
  from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
5
  dcc, exceptions, html)
6
- from sentence_transformers import SentenceTransformer
7
- from sklearn.datasets import fetch_20newsgroups
8
  from topicwizard.widgets import (ConceptClusters, DocumentClusters,
9
  TopicBrowser, TopicHierarchy,
10
  create_widget_container)
11
- from turftopic import ClusteringTopicModel, KeyNMF
12
 
13
 
14
  def create_app(blueprint):
@@ -29,27 +25,8 @@ def create_app(blueprint):
29
  return app
30
 
31
 
32
- with open("corpus.txt") as in_file:
33
- corpus = in_file.read().split("\n")
34
-
35
- print("Calculating embeddings")
36
- encoder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
37
- embeddings = encoder.encode(corpus, show_progress_bar=True)
38
-
39
- print("Fitting keynmf")
40
- keynmf = KeyNMF(5, encoder=encoder, random_state=42)
41
- keynmf_data = keynmf.prepare_topic_data(corpus, embeddings=embeddings)
42
- keynmf_data.hierarchy.divide_children(5)
43
-
44
- print("Fitting top2vec")
45
- top2vec = ClusteringTopicModel(
46
- n_reduce_to=5,
47
- feature_importance="centroid",
48
- encoder=encoder,
49
- random_state=0,
50
- )
51
- top2vec_data = top2vec.prepare_topic_data(corpus, embeddings=embeddings)
52
-
53
  print("Building blueprints.")
54
  keynmf_blueprint = create_widget_container(
55
  [TopicBrowser(), ConceptClusters(), TopicHierarchy()],
 
1
  import dash_mantine_components as dmc
 
 
2
  from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
3
  dcc, exceptions, html)
 
 
4
  from topicwizard.widgets import (ConceptClusters, DocumentClusters,
5
  TopicBrowser, TopicHierarchy,
6
  create_widget_container)
7
+ from turftopic.data import TopicData
8
 
9
 
10
  def create_app(blueprint):
 
25
  return app
26
 
27
 
28
+ keynmf_data = TopicData.from_disk("keynmf_data.joblib")
29
+ top2vec_data = TopicData.from_disk("top2vec_data.joblib")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  print("Building blueprints.")
31
  keynmf_blueprint = create_widget_container(
32
  [TopicBrowser(), ConceptClusters(), TopicHierarchy()],
top2vec_data.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4529b2dc32c6e1b87fc8c24367fbbc20f901b60296d28feef50dbe175913a03a
3
+ size 177605661