Christopher Digno
commited on
Commit
·
15e365d
1
Parent(s):
c818632
Added application files
Browse files- .gitignore +3 -0
- app.py +161 -0
- model/config.json +61 -0
- model/model.safetensors +3 -0
- requirements.txt +81 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +1 -0
- tokenizer/tokenizer.json +0 -0
- tokenizer/tokenizer_config.json +1 -0
- tokenizer/vocab.json +0 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.ipynb
|
2 |
+
coba.py
|
3 |
+
.venv/
|
app.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os.path
|
3 |
+
import re
|
4 |
+
import pandas as pd
|
5 |
+
from transformers import pipeline, Pipeline
|
6 |
+
from time import sleep
|
7 |
+
|
8 |
+
ID2LABEL = {
|
9 |
+
'LABEL_0': {
|
10 |
+
"Cause": "No Reason",
|
11 |
+
"description": "There is no reason that identifies the cause of mental disorder, or the text does not reflect a mental disorder",
|
12 |
+
},
|
13 |
+
'LABEL_1': {
|
14 |
+
"Cause": "Bias / Abuse",
|
15 |
+
"description": """
|
16 |
+
A strong inclination of the mind or a preconceived opinion about something or someone. To avoid someone intentionally,
|
17 |
+
or to prevent someone from taking part in the social activities of a group because they dislike the person or disapprove
|
18 |
+
of their activities. It includes body shaming, physical, sexual, or emotional abuse.
|
19 |
+
""",
|
20 |
+
},
|
21 |
+
'LABEL_2': {
|
22 |
+
"Cause": "Jobs and Career",
|
23 |
+
"description": """
|
24 |
+
Financial loss can have catastrophic effects on mental illness, relationships and even physical health. Poor, meaningless
|
25 |
+
and unmanageable education, unemployment, un-affordable home loans, poor financial advice, and losing a job are some of
|
26 |
+
the major concerns. It includes gossiping and/or social cliques, aggressive bullying behavior, poor communication
|
27 |
+
and unclear expectations, dictatorial management techniques that don’t embrace employee feedback. The educational problems
|
28 |
+
like picking up courses under some external pressure and poor grades are also part of this category.
|
29 |
+
""",
|
30 |
+
},
|
31 |
+
'LABEL_3': {
|
32 |
+
"Cause": "Medication",
|
33 |
+
"description": """
|
34 |
+
The general drugs and other antiviral drugs can increase the risk of depression. The habit of using substances and alcohols
|
35 |
+
can aggravate the problem of mental disorders. Moreover, medical problems like tumors, cancer, and other prolonged diseases
|
36 |
+
can boost the presence of mental depression.
|
37 |
+
""",
|
38 |
+
},
|
39 |
+
'LABEL_4': {
|
40 |
+
"Cause": "Relationships",
|
41 |
+
"description": """
|
42 |
+
When two people or a group of people fight, it may lead to a relationship or friendship drifting apart, for example, regular
|
43 |
+
fights, breakups, divorce, mistrust, jealousy, betrayal, difference in opinion, inconsistency, conflicts, bad company,
|
44 |
+
noncommitment, priority, envy. Problems like bad parenting and childhood trauma are also part of this category.
|
45 |
+
""",
|
46 |
+
},
|
47 |
+
'LABEL_5': {
|
48 |
+
"Cause": "Alienation",
|
49 |
+
"description": """
|
50 |
+
Alienation is the feeling of life being worthless even after doing everything. There may be indicators of meaninglessness,
|
51 |
+
loneliness, tired of daily routines, powerlessness, normlessness, isolation, and cultural estrangement.
|
52 |
+
""",
|
53 |
+
},
|
54 |
+
}
|
55 |
+
|
56 |
+
EXAMPLES = [
|
57 |
+
"""
|
58 |
+
Same dad, different day. I can't believe that my dad have no sense of humanity. He hit me in the head this morning.
|
59 |
+
I felt nauseous all day, including when I take my exam just before this. If I kill myself, I will make sure that my dad
|
60 |
+
got the most blame.
|
61 |
+
""",
|
62 |
+
"""
|
63 |
+
My boss laid me off today. He said that my company was downsizing. I don't believe his cr*p though, I think he just hates
|
64 |
+
me as the only women in my department. Now I don't know how to feed my four kids. I am officially, totally, completely, out
|
65 |
+
of money and will to live.
|
66 |
+
""",
|
67 |
+
"""
|
68 |
+
Last month, my doctor prescribed me some alprazolam to calm my nerves down. I churn through a bottle of the pill in a month.
|
69 |
+
My doctor today told me that I'm quite healthy to go without some calming drugs, but I just can't stop consuming them. I
|
70 |
+
just bought like 4 bottles of it through some shady middle-man and I'll go crazy if I went for half a day without
|
71 |
+
swallowing one.
|
72 |
+
""",
|
73 |
+
"""
|
74 |
+
My girlfriend dumped me because of some stupid nerdy dude at her office, my mother disowns me for not enlisting to the
|
75 |
+
millitary like my brother did. It all just keeps on pinning me to the ground. When I asked my friends to go out, they all
|
76 |
+
refused because my ex-girlfriend was spreading lies of how I cheated on her yada yada. I'm totally f*cked.
|
77 |
+
""",
|
78 |
+
"""
|
79 |
+
Everything is worthless, everything is meaningless. All the things that I do literally contribute nothing to the
|
80 |
+
society. I just want to go somewhere I can just lie down, sleep, eat, with no negative consequences for me.
|
81 |
+
""",
|
82 |
+
"""
|
83 |
+
Yesterday, I bought an ice cream for myself at the city. It was really good. I will definitely tell everyone
|
84 |
+
I know about this ice cream place.
|
85 |
+
""",
|
86 |
+
]
|
87 |
+
|
88 |
+
@st.cache_resource(show_spinner=False)
|
89 |
+
def load_model() -> Pipeline:
|
90 |
+
model = "AIMH/mental-longformer-base-4096"
|
91 |
+
tokenizer = "AIMH/mental-longformer-base-4096"
|
92 |
+
if os.path.isfile("model/model.safetensors"):
|
93 |
+
model = "model"
|
94 |
+
if os.path.isfile("tokenizer/tokenizer.json"):
|
95 |
+
tokenizer = "tokenizer"
|
96 |
+
return pipeline("text-classification", model=model, tokenizer=tokenizer)
|
97 |
+
|
98 |
+
@st.cache_data(show_spinner=False)
|
99 |
+
def predict(text: str, _pipe: Pipeline):
|
100 |
+
res = _pipe(text, return_all_scores=True)[0]
|
101 |
+
res = sorted(res, key=lambda d: d['score'], reverse=True)
|
102 |
+
res = [
|
103 |
+
dict(ID2LABEL[x["label"]], **{"Confidence": round(x["score"] * 100)})
|
104 |
+
for x in res]
|
105 |
+
return pd.DataFrame(res)
|
106 |
+
# return res
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
st.markdown('<h1 align="center">✨ Depression Causal Analysis ✨</h1>', unsafe_allow_html=True, )
|
111 |
+
st.error("""
|
112 |
+
DISCLAIMER: This project was only intended for research showcase purposes only.
|
113 |
+
If you believe that you have mental health issues, please consult your physician.\n
|
114 |
+
This project also contains triggering example words from social media that might not
|
115 |
+
fit everybody. Continue with caution. Please love yourself and don't hesitate to reach
|
116 |
+
out for professional help. ❤
|
117 |
+
""")
|
118 |
+
|
119 |
+
left, center, right = st.columns(3)
|
120 |
+
left.link_button("Go to project on GitHub", "https://github.com/stackofsugar/", use_container_width=True)
|
121 |
+
center.link_button("Read the dataset's paper", "https://arxiv.org/abs/2207.04674v1", use_container_width=True)
|
122 |
+
right.link_button("Read the model's paper", "https://arxiv.org/abs/2304.10447v1", use_container_width=True)
|
123 |
+
|
124 |
+
example = st.selectbox("Load an example", EXAMPLES, index=None)
|
125 |
+
|
126 |
+
with st.form("main_prediction"):
|
127 |
+
text = st.text_area(
|
128 |
+
"Text to analyze (tip: this model is better for long texts)",
|
129 |
+
value=(re.sub(' +', ' ', example).strip().replace("\n", "") if example else ""),
|
130 |
+
height=200
|
131 |
+
)
|
132 |
+
|
133 |
+
left, middle, right = st.columns(3)
|
134 |
+
submitted = middle.form_submit_button("Predict!", use_container_width=True)
|
135 |
+
|
136 |
+
if submitted and text.strip():
|
137 |
+
pipe = None
|
138 |
+
with st.spinner("Loading model..."):
|
139 |
+
pipe = load_model()
|
140 |
+
with st.spinner("Predicting..."):
|
141 |
+
preds = predict(text.strip(), pipe)
|
142 |
+
st.markdown(f"**Result: {preds.at[0, "Cause"]}**")
|
143 |
+
|
144 |
+
col1, col2 = st.columns([1,2])
|
145 |
+
# col1, col2 = st.columns(2)
|
146 |
+
col1.dataframe(preds[["Cause", "Confidence"]],
|
147 |
+
column_config={
|
148 |
+
"Confidence": st.column_config.NumberColumn(
|
149 |
+
format="%d%%"
|
150 |
+
)
|
151 |
+
},
|
152 |
+
hide_index=True,
|
153 |
+
use_container_width=True)
|
154 |
+
|
155 |
+
col2.markdown(f"Explanation of **{preds.at[0, "Cause"]}**:")
|
156 |
+
col2.write(preds.at[0, "description"])
|
157 |
+
|
158 |
+
st.write(
|
159 |
+
"""Developed with :heart: by [stackofsugar](https://github.com/stackofsugar/). For more information on the
|
160 |
+
project, please visit the project's GitHub page with the button above."""
|
161 |
+
)
|
model/config.json
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "allenai/longformer-base-4096",
|
3 |
+
"architectures": [
|
4 |
+
"LongformerForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_mode": "longformer",
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"attention_window": [
|
9 |
+
512,
|
10 |
+
512,
|
11 |
+
512,
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512,
|
20 |
+
512
|
21 |
+
],
|
22 |
+
"bos_token_id": 0,
|
23 |
+
"eos_token_id": 2,
|
24 |
+
"gradient_checkpointing": false,
|
25 |
+
"hidden_act": "gelu",
|
26 |
+
"hidden_dropout_prob": 0.1,
|
27 |
+
"hidden_size": 768,
|
28 |
+
"id2label": {
|
29 |
+
"0": "LABEL_0",
|
30 |
+
"1": "LABEL_1",
|
31 |
+
"2": "LABEL_2",
|
32 |
+
"3": "LABEL_3",
|
33 |
+
"4": "LABEL_4",
|
34 |
+
"5": "LABEL_5"
|
35 |
+
},
|
36 |
+
"ignore_attention_mask": false,
|
37 |
+
"initializer_range": 0.02,
|
38 |
+
"intermediate_size": 3072,
|
39 |
+
"label2id": {
|
40 |
+
"LABEL_0": 0,
|
41 |
+
"LABEL_1": 1,
|
42 |
+
"LABEL_2": 2,
|
43 |
+
"LABEL_3": 3,
|
44 |
+
"LABEL_4": 4,
|
45 |
+
"LABEL_5": 5
|
46 |
+
},
|
47 |
+
"layer_norm_eps": 1e-05,
|
48 |
+
"max_position_embeddings": 4098,
|
49 |
+
"model_type": "longformer",
|
50 |
+
"num_attention_heads": 12,
|
51 |
+
"num_hidden_layers": 12,
|
52 |
+
"onnx_export": false,
|
53 |
+
"pad_token_id": 1,
|
54 |
+
"position_embedding_type": "absolute",
|
55 |
+
"sep_token_id": 2,
|
56 |
+
"torch_dtype": "float32",
|
57 |
+
"transformers_version": "4.44.2",
|
58 |
+
"type_vocab_size": 1,
|
59 |
+
"use_cache": true,
|
60 |
+
"vocab_size": 50265
|
61 |
+
}
|
model/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3da077d487c964259ec9f47b5b58f39b9db45f2964e3763e3fc91695ddb0728b
|
3 |
+
size 594690488
|
requirements.txt
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==1.0.1
|
2 |
+
altair==5.4.1
|
3 |
+
asttokens==2.4.1
|
4 |
+
attrs==24.2.0
|
5 |
+
blinker==1.8.2
|
6 |
+
cachetools==5.5.0
|
7 |
+
certifi==2024.8.30
|
8 |
+
charset-normalizer==3.4.0
|
9 |
+
click==8.1.7
|
10 |
+
colorama==0.4.6
|
11 |
+
comm==0.2.2
|
12 |
+
debugpy==1.8.7
|
13 |
+
decorator==5.1.1
|
14 |
+
executing==2.1.0
|
15 |
+
filelock==3.16.1
|
16 |
+
fsspec==2024.10.0
|
17 |
+
gitdb==4.0.11
|
18 |
+
GitPython==3.1.43
|
19 |
+
huggingface-hub==0.26.2
|
20 |
+
idna==3.10
|
21 |
+
inquirerpy==0.3.4
|
22 |
+
ipykernel==6.29.5
|
23 |
+
ipython==8.29.0
|
24 |
+
jedi==0.19.1
|
25 |
+
Jinja2==3.1.4
|
26 |
+
jsonschema==4.23.0
|
27 |
+
jsonschema-specifications==2024.10.1
|
28 |
+
jupyter_client==8.6.3
|
29 |
+
jupyter_core==5.7.2
|
30 |
+
markdown-it-py==3.0.0
|
31 |
+
MarkupSafe==3.0.2
|
32 |
+
matplotlib-inline==0.1.7
|
33 |
+
mdurl==0.1.2
|
34 |
+
mpmath==1.3.0
|
35 |
+
narwhals==1.12.1
|
36 |
+
nest-asyncio==1.6.0
|
37 |
+
networkx==3.4.2
|
38 |
+
numpy==2.1.2
|
39 |
+
packaging==24.1
|
40 |
+
pandas==2.2.3
|
41 |
+
parso==0.8.4
|
42 |
+
pfzy==0.3.4
|
43 |
+
pillow==10.4.0
|
44 |
+
platformdirs==4.3.6
|
45 |
+
prompt_toolkit==3.0.48
|
46 |
+
protobuf==5.28.3
|
47 |
+
psutil==6.1.0
|
48 |
+
pure_eval==0.2.3
|
49 |
+
pyarrow==18.0.0
|
50 |
+
pydeck==0.9.1
|
51 |
+
Pygments==2.18.0
|
52 |
+
python-dateutil==2.9.0.post0
|
53 |
+
pytz==2024.2
|
54 |
+
pywin32==308
|
55 |
+
PyYAML==6.0.2
|
56 |
+
pyzmq==26.2.0
|
57 |
+
referencing==0.35.1
|
58 |
+
regex==2024.9.11
|
59 |
+
requests==2.32.3
|
60 |
+
rich==13.9.3
|
61 |
+
rpds-py==0.20.0
|
62 |
+
safetensors==0.4.5
|
63 |
+
setuptools==75.3.0
|
64 |
+
six==1.16.0
|
65 |
+
smmap==5.0.1
|
66 |
+
stack-data==0.6.3
|
67 |
+
streamlit==1.39.0
|
68 |
+
sympy==1.13.1
|
69 |
+
tenacity==9.0.0
|
70 |
+
tokenizers==0.20.1
|
71 |
+
toml==0.10.2
|
72 |
+
torch==2.5.1
|
73 |
+
tornado==6.4.1
|
74 |
+
tqdm==4.66.6
|
75 |
+
traitlets==5.14.3
|
76 |
+
transformers==4.46.1
|
77 |
+
typing_extensions==4.12.2
|
78 |
+
tzdata==2024.2
|
79 |
+
urllib3==2.2.3
|
80 |
+
watchdog==5.0.3
|
81 |
+
wcwidth==0.2.13
|
tokenizer/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
|
tokenizer/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 4096, "special_tokens_map_file": null, "name_or_path": "allenai/longformer-base-4096", "tokenizer_class": "LongformerTokenizer"}
|
tokenizer/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|