Spaces:

kowsiknd
/

Twitter-Sentiment-Analysis-using-BERT-model

Build error

App Files Files Community

kowsiknd commited on Apr 20, 2023

Commit

b3aa083

1 Parent(s): c0cd823

update

Browse files

Files changed (16) hide show

.gitattributes +1 -0
.gitignore +2 -0
app.py +38 -0
best_model_state.bin +3 -0
classifier.py +50 -0
images/EDA1.png +3 -0
images/EDA2.png +3 -0
images/EDA3-votes.png +3 -0
images/Input pipeline.png +3 -0
images/confusion_matrix.png +3 -0
images/model_on_cpu.png +3 -0
images/model_on_gpu.png +3 -0
images/readme.md +1 -0
images/train_test_scores.png +3 -0
images/train_val_accuracy.png +3 -0
requirements.txt +91 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ env/
2	+ __pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from classifier import classify
+from PIL  import Image
+import streamlit as st
+st.title("Twitter Sentiment Analysis using BERT model")
+st.subheader("Motivation")
+st.markdown("""
+Cyberbullying is a serious problem in today's world. It is a form of bullying that takes place using electronic technology. This model will act as an tool for the detection of the abusive content
+in the tweets. This model can be used by the social media platforms to detect the abusive content in the tweets and take necessary action.
+Huggingface provides an easy interfce to test the models before the use.
+""")
+text = st.text_input("Enter a tweet to classify it as either Normal or Abusive. (Press enter to submit)",
+              value="I love DCNM course", max_chars=512, key=None, type="default",
+              help=None, autocomplete=None)
+st.markdown(f"The tweet is classified as: **{classify(text)}**")
+st.markdown("Try out for abusive _Giving and taking dowry is crappy thing_")
+st.subheader("About the model")
+st.markdown("""
+Model was trained on twitter dataset ENCASEH2020 from Founta, A.M et. al. (2018) [3]. BERT Tiny model [1][2][5] was chosen for this project because, empirically,
+giving better result with least number of parameters. The model was trained for 10 epochs with batch size of 32 and AdamW optimizer with learning rate of 1e-2 and loss as cross entropy.
+""")
+st.image("./images/train_val_accuracy.png [4]", caption="Train and Validation Accuracy", use_column_width=True)
+st.image("./images/train_test_scores.png [4]", caption="Classification Report", use_column_width=True)
+st.image("./images/confusion_matrix.png [4]", caption="Confusion Matrix", use_column_width=True)
+st.subheader("References")
+st.markdown("1. [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)")
+st.markdown("2. [BERT-Tiny: A Tiny BERT for Natural Language Understanding](https://arxiv.org/abs/1909.10351)")
+st.markdown("3. [Founta, A.M., Djouvas, C., Chatzakou, D., Leontiadis, I., Blackburn, J., Stringhini, G., Vakali, A., Sirivianos, M., & Kourtellis, N. (2018).Large Scale Crowdsourcing and Characterization of Twitter Abusive Behavior. In 11th International Conference on Web and Social Media, ICWSM 2018.](https://arxiv.org/abs/1802.00393)")
+st.markdown("4. [Ajay S, Ram, Kowsik N D, Navaneeth D, Amarnath C N, Cyberbullying Detection using Bidirectional Encoder Representation from Transformers 2022](https://github.com/Cubemet/bert-models)")
+st.markdown("5. [Base Model from nreimers](https://huggingface.co/nreimers/BERT-Tiny_L-2_H-128_A-2")

best_model_state.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6cfb5ee667393eef14cb0bef4b8193a0b1690e743ebf4c000f57fce39943542
+size 17564519

classifier.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+import transformers
+from transformers import BertModel, BertTokenizer, AutoTokenizer
+from torch import nn, optim
+from torch.utils.data import Dataset, DataLoader
+import torch.nn.functional as F
+###########################################################
+review_text = "I love you"
+###########################################################
+PRE_TRAINED_MODEL_NAME = 'nreimers/BERT-Tiny_L-2_H-128_A-2'
+class_names = ["Normal", "Abusive"]
+MAX_LEN = "max_length"
+class CyberbullyingClassifier(nn.Module):
+  def __init__(self, n_classes):
+    super(CyberbullyingClassifier, self).__init__()
+    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME).to("cpu")
+    # self.drop = nn.Dropout(p=0.3)
+    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
+  def forward(self, input_ids, attention_mask):
+    bert_out = self.bert(
+      input_ids=input_ids,
+      attention_mask=attention_mask
+    )
+    pooled_output = bert_out[1]
+    # output = self.drop(pooled_output)
+    return self.out(pooled_output)
+tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
+model = CyberbullyingClassifier(2)
+model.load_state_dict(torch.load('./best_model_state.bin', map_location=torch.device('cpu')))
+def classify(review_text):
+    encoded_review = tokenizer(review_text, padding=MAX_LEN, truncation=True, return_tensors="pt")
+    input_ids = encoded_review['input_ids'].to('cpu')
+    attention_mask = encoded_review['attention_mask'].to('cpu')
+    output = model(input_ids, attention_mask)
+    _, prediction = torch.max(output, dim=1)
+    print(f'Review text: {review_text}')
+    print(f'Sentiment  : {class_names[prediction]}')
+    return class_names[prediction]

images/EDA1.png ADDED Viewed

Git LFS Details

SHA256: d1171dcba646351202c502967861c73baf299702adf99120260fce092a1d06ab
Pointer size: 129 Bytes
Size of remote file: 7.8 kB

images/EDA2.png ADDED Viewed

Git LFS Details

SHA256: b889df578537100a41d6f2cf72cf953160df5b8226f861650052e2f84d513ac5
Pointer size: 130 Bytes
Size of remote file: 14.1 kB

images/EDA3-votes.png ADDED Viewed

Git LFS Details

SHA256: 562506338bf3ff93e29aa9ab0b9d6aa02339abd0db50b91039876d15a322493f
Pointer size: 130 Bytes
Size of remote file: 10.1 kB

images/Input pipeline.png ADDED Viewed

Git LFS Details

SHA256: 75b0e2cacee2d74fd880e6952db989f84d42c98e6f0324d371a8e69ce6c5bc8b
Pointer size: 130 Bytes
Size of remote file: 40.4 kB

images/confusion_matrix.png ADDED Viewed

Git LFS Details

SHA256: 0ce8decc226ee8050fba97b9c8d9b47c0e1cc606048d8801eb80d253b1881ec2
Pointer size: 130 Bytes
Size of remote file: 21.1 kB

images/model_on_cpu.png ADDED Viewed

Git LFS Details

SHA256: 6bcd5f3f00c4c069253a4b48db70f40f32bf7d9b7255692819c6c944f972aab0
Pointer size: 130 Bytes
Size of remote file: 17.6 kB

images/model_on_gpu.png ADDED Viewed

Git LFS Details

SHA256: 7a9311182c5a49ff65592f9bf682d39a92c3b96833331907025ee4033d125832
Pointer size: 130 Bytes
Size of remote file: 17.3 kB

images/readme.md ADDED Viewed

	@@ -0,0 +1 @@


1	+

images/train_test_scores.png ADDED Viewed

Git LFS Details

SHA256: 3925e4c197a742aec592d1694276aab1cf0af286769f990591a18b1e5383fe7d
Pointer size: 130 Bytes
Size of remote file: 25.7 kB

images/train_val_accuracy.png ADDED Viewed

Git LFS Details

SHA256: 8d9facd04d23fb1a72986e592b9776c68ca9dec2f968c2b92d34526abda230b0
Pointer size: 130 Bytes
Size of remote file: 32 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,91 @@

+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==4.2.2
+asgiref==3.6.0
+async-timeout==4.0.2
+attrs==23.1.0
+backports.zoneinfo==0.2.1
+blinker==1.6.2
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+cmake==3.26.3
+datasets==2.11.0
+decorator==5.1.1
+dill==0.3.6
+Django==4.2
+entrypoints==0.4
+filelock==3.12.0
+frozenlist==1.3.3
+fsspec==2023.4.0
+gitdb==4.0.10
+GitPython==3.1.31
+huggingface-hub==0.13.4
+idna==3.4
+importlib-metadata==6.5.0
+importlib-resources==5.12.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+lit==16.0.1
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+networkx==3.1
+numpy==1.24.2
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+packaging==23.1
+pandas==1.5.3
+Pillow==9.5.0
+pkgutil-resolve-name==1.3.10
+protobuf==3.20.3
+pyarrow==11.0.0
+pydeck==0.8.1b0
+Pygments==2.15.1
+Pympler==1.0.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.3
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+regex==2023.3.23
+requests==2.28.2
+responses==0.18.0
+rich==13.3.4
+six==1.16.0
+smmap==5.0.0
+sqlparse==0.4.4
+streamlit==1.21.0
+sympy==1.11.1
+tokenizers==0.13.3
+toml==0.10.2
+toolz==0.12.0
+torch==2.0.0
+torchaudio==2.0.1
+torchvision==0.15.1
+tornado==6.3
+tqdm==4.65.0
+transformers==4.28.1
+triton==2.0.0
+typing-extensions==4.5.0
+tzdata==2023.3
+tzlocal==4.3
+urllib3==1.26.15
+validators==0.20.0
+watchdog==3.0.0
+xxhash==3.2.0
+yarl==1.8.2
+zipp==3.15.0