Spaces:
Running
Running
Commit
·
83c0537
1
Parent(s):
b4a4d6e
Improved functionality
Browse files- gradio_app.py +99 -134
- model/analyzer.py +224 -197
gradio_app.py
CHANGED
@@ -1,172 +1,137 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import time
|
3 |
from model.analyzer import analyze_content
|
|
|
4 |
|
5 |
-
# Custom CSS for
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
border-radius:
|
13 |
-
box-shadow: 0
|
14 |
-
font-family: 'Nunito', sans-serif;
|
15 |
}
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
color: #
|
20 |
-
|
21 |
font-weight: bold;
|
22 |
-
margin-bottom: 0.5rem;
|
23 |
-
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
|
24 |
}
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
color: #
|
29 |
-
font-size: 1.2rem;
|
30 |
-
margin-bottom: 2rem;
|
31 |
}
|
32 |
|
33 |
-
|
34 |
-
color: #
|
35 |
font-weight: bold;
|
36 |
}
|
37 |
|
38 |
.content-box {
|
39 |
-
background: rgba(255,255,255,0.9);
|
40 |
border-radius: 15px;
|
41 |
-
|
42 |
-
|
|
|
43 |
}
|
44 |
|
45 |
-
.
|
46 |
-
background:
|
47 |
-
border:
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
font-weight: bold !important;
|
52 |
-
padding: 0.8rem 2rem !important;
|
53 |
-
transition: transform 0.2s !important;
|
54 |
}
|
55 |
|
56 |
-
.
|
57 |
-
|
58 |
}
|
59 |
|
60 |
-
.
|
61 |
-
background:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
66 |
}
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
background: linear-gradient(90deg, #FF69B4, #87CEEB);
|
71 |
-
border-radius: 3px;
|
72 |
-
transition: width 0.3s ease;
|
73 |
}
|
74 |
"""
|
75 |
|
76 |
-
def analyze_with_loading(text):
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
81 |
|
82 |
-
# Perform
|
|
|
83 |
result = analyze_content(text)
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
triggers_text = ", ".join(result["detected_triggers"])
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
<p><strong>Analysis Time:</strong> {result['analysis_timestamp']}</p>
|
99 |
-
</div>
|
100 |
-
"""
|
101 |
-
}
|
102 |
|
103 |
-
with gr.Blocks(css=
|
104 |
-
|
|
|
105 |
gr.HTML("""
|
106 |
-
<
|
107 |
-
<
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
""")
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
analyze_btn = gr.Button(
|
122 |
-
"🍬 Analyze Content",
|
123 |
-
elem_classes=["analyze-button"]
|
124 |
-
)
|
125 |
-
|
126 |
-
progress = gr.Number(
|
127 |
-
value=0,
|
128 |
-
visible=False,
|
129 |
-
elem_id="progress-value"
|
130 |
)
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
document.getElementById('loading-bar').style.width = progress + '%';
|
145 |
-
}
|
146 |
-
|
147 |
-
// Watch for changes to the progress value
|
148 |
-
const observer = new MutationObserver((mutations) => {
|
149 |
-
mutations.forEach((mutation) => {
|
150 |
-
if (mutation.type === 'attributes' && mutation.attributeName === 'value') {
|
151 |
-
const progress = document.getElementById('progress-value').value;
|
152 |
-
updateLoadingBar(progress);
|
153 |
-
}
|
154 |
-
});
|
155 |
-
});
|
156 |
-
|
157 |
-
// Start observing the progress value element
|
158 |
-
observer.observe(document.getElementById('progress-value'), {
|
159 |
-
attributes: true
|
160 |
-
});
|
161 |
-
</script>
|
162 |
-
""")
|
163 |
-
|
164 |
-
analyze_btn.click(
|
165 |
-
fn=analyze_with_loading,
|
166 |
-
inputs=[text_input],
|
167 |
-
outputs=[gr.State({"progress": 0, "result": ""}), output],
|
168 |
-
show_progress=False
|
169 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
iface.launch()
|
|
|
1 |
+
# gradio_app.py
|
2 |
import gradio as gr
|
|
|
3 |
from model.analyzer import analyze_content
|
4 |
+
import time
|
5 |
|
6 |
+
# Custom CSS for styling
|
7 |
+
custom_css = """
|
8 |
+
.treat-title {
|
9 |
+
text-align: center;
|
10 |
+
padding: 20px;
|
11 |
+
margin-bottom: 20px;
|
12 |
+
background: linear-gradient(135deg, #fce4ec 0%, #e3f2fd 100%);
|
13 |
+
border-radius: 15px;
|
14 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
|
|
15 |
}
|
16 |
|
17 |
+
.treat-title h1 {
|
18 |
+
font-size: 3em;
|
19 |
+
color: #d81b60;
|
20 |
+
margin-bottom: 10px;
|
21 |
font-weight: bold;
|
|
|
|
|
22 |
}
|
23 |
|
24 |
+
.treat-title p {
|
25 |
+
font-size: 1.2em;
|
26 |
+
color: #5c6bc0;
|
|
|
|
|
27 |
}
|
28 |
|
29 |
+
.highlight {
|
30 |
+
color: #d81b60;
|
31 |
font-weight: bold;
|
32 |
}
|
33 |
|
34 |
.content-box {
|
35 |
+
background: rgba(255, 255, 255, 0.9);
|
36 |
border-radius: 15px;
|
37 |
+
padding: 20px;
|
38 |
+
margin: 20px 0;
|
39 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
40 |
}
|
41 |
|
42 |
+
.results-box {
|
43 |
+
background: rgba(255, 255, 255, 0.9);
|
44 |
+
border-radius: 15px;
|
45 |
+
padding: 20px;
|
46 |
+
margin-top: 20px;
|
47 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
|
|
|
|
|
|
48 |
}
|
49 |
|
50 |
+
.gradio-container {
|
51 |
+
background: linear-gradient(135deg, #fce4ec 0%, #e3f2fd 100%);
|
52 |
}
|
53 |
|
54 |
+
.analyze-button {
|
55 |
+
background-color: #d81b60 !important;
|
56 |
+
color: white !important;
|
57 |
+
border-radius: 25px !important;
|
58 |
+
padding: 10px 20px !important;
|
59 |
+
font-size: 1.1em !important;
|
60 |
+
transition: transform 0.2s !important;
|
61 |
}
|
62 |
|
63 |
+
.analyze-button:hover {
|
64 |
+
transform: scale(1.05) !important;
|
|
|
|
|
|
|
65 |
}
|
66 |
"""
|
67 |
|
68 |
+
def analyze_with_loading(text, progress=gr.Progress()):
|
69 |
+
# Initialize progress
|
70 |
+
progress(0, desc="Starting analysis...")
|
71 |
+
|
72 |
+
# Simulate initial loading (model preparation)
|
73 |
+
for i in range(30):
|
74 |
+
time.sleep(0.1) # Reduced sleep time for better UX
|
75 |
+
progress((i + 1) / 100)
|
76 |
|
77 |
+
# Perform actual analysis
|
78 |
+
progress(0.3, desc="Processing text...")
|
79 |
result = analyze_content(text)
|
80 |
|
81 |
+
# Simulate final processing
|
82 |
+
for i in range(70, 100):
|
83 |
+
time.sleep(0.05) # Reduced sleep time
|
84 |
+
progress((i + 1) / 100)
|
|
|
85 |
|
86 |
+
# Format the results for display
|
87 |
+
triggers = result["detected_triggers"]
|
88 |
+
if triggers == ["None"]:
|
89 |
+
return "No triggers detected in the content."
|
90 |
+
else:
|
91 |
+
trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
|
92 |
+
return f"Triggers Detected:\n{trigger_list}"
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
with gr.Blocks(css=custom_css) as iface:
|
95 |
+
# Title section
|
96 |
+
with gr.Box(elem_classes="treat-title"):
|
97 |
gr.HTML("""
|
98 |
+
<h1>TREAT</h1>
|
99 |
+
<p><span class="highlight">T</span>rigger
|
100 |
+
<span class="highlight">R</span>ecognition for
|
101 |
+
<span class="highlight">E</span>njoyable and
|
102 |
+
<span class="highlight">A</span>ppropriate
|
103 |
+
<span class="highlight">T</span>elevision</p>
|
104 |
""")
|
105 |
+
|
106 |
+
# Content input section
|
107 |
+
with gr.Box(elem_classes="content-box"):
|
108 |
+
input_text = gr.Textbox(
|
109 |
+
label="Content to Analyze",
|
110 |
+
placeholder="Paste your content here...",
|
111 |
+
lines=8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
)
|
113 |
+
|
114 |
+
# Analysis button
|
115 |
+
analyze_btn = gr.Button(
|
116 |
+
"Analyze Content",
|
117 |
+
elem_classes="analyze-button"
|
118 |
+
)
|
119 |
+
|
120 |
+
# Results section
|
121 |
+
with gr.Box(elem_classes="results-box"):
|
122 |
+
output_text = gr.Textbox(
|
123 |
+
label="Analysis Results",
|
124 |
+
lines=5,
|
125 |
+
readonly=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
)
|
127 |
+
|
128 |
+
# Set up the click event
|
129 |
+
analyze_btn.click(
|
130 |
+
fn=analyze_with_loading,
|
131 |
+
inputs=[input_text],
|
132 |
+
outputs=[output_text],
|
133 |
+
api_name="analyze"
|
134 |
+
)
|
135 |
|
136 |
if __name__ == "__main__":
|
137 |
iface.launch()
|
model/analyzer.py
CHANGED
@@ -1,145 +1,146 @@
|
|
1 |
-
# analyzer.py
|
2 |
-
# model > analyzer.py
|
3 |
-
|
4 |
import os
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
import torch
|
7 |
from datetime import datetime
|
8 |
import gradio as gr
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
tokenizer =
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
"
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
"
|
45 |
-
|
46 |
-
"
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
"
|
64 |
-
"
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
"
|
69 |
-
|
70 |
-
"
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
"
|
94 |
-
"
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
"Self-Harm": {
|
99 |
-
"mapped_name": "Self-Harm",
|
100 |
-
"description": (
|
101 |
-
"Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
|
102 |
-
"as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
|
103 |
-
)
|
104 |
-
},
|
105 |
-
"Gun Use": {
|
106 |
-
"mapped_name": "Gun Use",
|
107 |
-
"description": (
|
108 |
-
"Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
|
109 |
-
"gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
|
110 |
-
)
|
111 |
-
},
|
112 |
-
"Animal Cruelty": {
|
113 |
-
"mapped_name": "Animal Cruelty",
|
114 |
-
"description": (
|
115 |
-
"Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
|
116 |
-
"mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
|
117 |
-
)
|
118 |
-
},
|
119 |
-
"Mental Health Issues": {
|
120 |
-
"mapped_name": "Mental Health Issues",
|
121 |
-
"description": (
|
122 |
-
"Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
|
123 |
-
"or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
|
124 |
-
"like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
|
125 |
-
)
|
126 |
}
|
127 |
-
}
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
mapped_name = info["mapped_name"]
|
140 |
description = info["description"]
|
141 |
|
142 |
-
print(f"\nAnalyzing for {mapped_name}...")
|
143 |
prompt = f"""
|
144 |
Check this text for any indication of {mapped_name} ({description}).
|
145 |
Be sensitive to subtle references or implications, make sure the text is not metaphorical.
|
@@ -148,81 +149,107 @@ def analyze_script(script):
|
|
148 |
Answer:
|
149 |
"""
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
return final_triggers
|
193 |
-
|
194 |
-
# Define the Gradio interface
|
195 |
-
def analyze_content(script):
|
196 |
-
# Perform the analysis on the input script using the analyze_script function
|
197 |
-
triggers = analyze_script(script)
|
198 |
-
|
199 |
-
# Define the result based on the triggers found
|
200 |
-
if isinstance(triggers, list) and triggers != ["None"]:
|
201 |
result = {
|
202 |
"detected_triggers": triggers,
|
203 |
-
"confidence": "High - Content detected",
|
204 |
-
"model": "Llama-3.2-1B",
|
205 |
-
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
206 |
-
}
|
207 |
-
else:
|
208 |
-
result = {
|
209 |
-
"detected_triggers": ["None"],
|
210 |
-
"confidence": "High - No concerning content detected",
|
211 |
"model": "Llama-3.2-1B",
|
212 |
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
213 |
}
|
214 |
|
215 |
-
|
216 |
-
return result
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
)
|
|
|
226 |
|
227 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
iface.launch()
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
from datetime import datetime
|
5 |
import gradio as gr
|
6 |
+
from typing import Dict, List, Union, Optional
|
7 |
+
import logging
|
8 |
+
|
9 |
+
# Configure logging
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
class ContentAnalyzer:
|
14 |
+
def __init__(self):
|
15 |
+
self.hf_token = os.getenv("HF_TOKEN")
|
16 |
+
if not self.hf_token:
|
17 |
+
raise ValueError("HF_TOKEN environment variable is not set!")
|
18 |
+
|
19 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
+
self.model = None
|
21 |
+
self.tokenizer = None
|
22 |
+
self.trigger_categories = self._init_trigger_categories()
|
23 |
+
|
24 |
+
def _init_trigger_categories(self) -> Dict:
|
25 |
+
"""Initialize trigger categories with their descriptions."""
|
26 |
+
return {
|
27 |
+
"Violence": {
|
28 |
+
"mapped_name": "Violence",
|
29 |
+
"description": (
|
30 |
+
"Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
|
31 |
+
"Includes direct physical confrontations, implied violence, or large-scale events like wars, riots, or violent protests."
|
32 |
+
)
|
33 |
+
},
|
34 |
+
"Death": {
|
35 |
+
"mapped_name": "Death References",
|
36 |
+
"description": (
|
37 |
+
"Any mention, implication, or depiction of the loss of life, including direct deaths of characters, mentions of deceased individuals, "
|
38 |
+
"or abstract references to mortality. This covers depictions of funerals, mourning, or death-centered dialogue."
|
39 |
+
)
|
40 |
+
},
|
41 |
+
"Substance Use": {
|
42 |
+
"mapped_name": "Substance Use",
|
43 |
+
"description": (
|
44 |
+
"Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
|
45 |
+
"Includes scenes of drinking, smoking, drug use, withdrawal symptoms, or rehabilitation."
|
46 |
+
)
|
47 |
+
},
|
48 |
+
"Gore": {
|
49 |
+
"mapped_name": "Gore",
|
50 |
+
"description": (
|
51 |
+
"Extremely detailed and graphic depictions of severe physical injuries, mutilation, or extreme bodily harm, including heavy blood, "
|
52 |
+
"exposed organs, or dismemberment."
|
53 |
+
)
|
54 |
+
},
|
55 |
+
"Vomit": {
|
56 |
+
"mapped_name": "Vomit",
|
57 |
+
"description": "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail."
|
58 |
+
},
|
59 |
+
"Sexual Content": {
|
60 |
+
"mapped_name": "Sexual Content",
|
61 |
+
"description": (
|
62 |
+
"Any depiction or mention of sexual activity, intimacy, or sexual behavior, from implied scenes to explicit descriptions."
|
63 |
+
)
|
64 |
+
},
|
65 |
+
"Sexual Abuse": {
|
66 |
+
"mapped_name": "Sexual Abuse",
|
67 |
+
"description": (
|
68 |
+
"Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force."
|
69 |
+
)
|
70 |
+
},
|
71 |
+
"Self-Harm": {
|
72 |
+
"mapped_name": "Self-Harm",
|
73 |
+
"description": (
|
74 |
+
"Any mention or depiction of behaviors where an individual intentionally causes harm to themselves, including suicidal thoughts."
|
75 |
+
)
|
76 |
+
},
|
77 |
+
"Gun Use": {
|
78 |
+
"mapped_name": "Gun Use",
|
79 |
+
"description": (
|
80 |
+
"Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner."
|
81 |
+
)
|
82 |
+
},
|
83 |
+
"Animal Cruelty": {
|
84 |
+
"mapped_name": "Animal Cruelty",
|
85 |
+
"description": (
|
86 |
+
"Any act of harm, abuse, or neglect toward animals, whether intentional or accidental."
|
87 |
+
)
|
88 |
+
},
|
89 |
+
"Mental Health Issues": {
|
90 |
+
"mapped_name": "Mental Health Issues",
|
91 |
+
"description": (
|
92 |
+
"Any reference to mental health struggles, disorders, or psychological distress, including therapy and treatment."
|
93 |
+
)
|
94 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
}
|
|
|
96 |
|
97 |
+
async def load_model(self, progress=None) -> None:
|
98 |
+
"""Load the model and tokenizer with progress updates."""
|
99 |
+
try:
|
100 |
+
if progress:
|
101 |
+
progress(0.1, "Loading tokenizer...")
|
102 |
+
|
103 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
104 |
+
"meta-llama/Llama-3.2-1B",
|
105 |
+
use_fast=True
|
106 |
+
)
|
107 |
|
108 |
+
if progress:
|
109 |
+
progress(0.3, "Loading model...")
|
110 |
+
|
111 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
112 |
+
"meta-llama/Llama-3.2-1B",
|
113 |
+
token=self.hf_token,
|
114 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
115 |
+
device_map="auto"
|
116 |
+
)
|
117 |
|
118 |
+
if progress:
|
119 |
+
progress(0.5, "Model loaded successfully")
|
120 |
+
|
121 |
+
logger.info(f"Model loaded successfully on {self.device}")
|
122 |
+
except Exception as e:
|
123 |
+
logger.error(f"Error loading model: {str(e)}")
|
124 |
+
raise
|
125 |
+
|
126 |
+
def _chunk_text(self, text: str, chunk_size: int = 256, overlap: int = 15) -> List[str]:
|
127 |
+
"""Split text into overlapping chunks for processing."""
|
128 |
+
return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
|
129 |
+
|
130 |
+
async def analyze_chunk(
|
131 |
+
self,
|
132 |
+
chunk: str,
|
133 |
+
progress: Optional[gr.Progress] = None,
|
134 |
+
current_progress: float = 0,
|
135 |
+
progress_step: float = 0
|
136 |
+
) -> Dict[str, float]:
|
137 |
+
"""Analyze a single chunk of text for triggers."""
|
138 |
+
chunk_triggers = {}
|
139 |
+
|
140 |
+
for category, info in self.trigger_categories.items():
|
141 |
mapped_name = info["mapped_name"]
|
142 |
description = info["description"]
|
143 |
|
|
|
144 |
prompt = f"""
|
145 |
Check this text for any indication of {mapped_name} ({description}).
|
146 |
Be sensitive to subtle references or implications, make sure the text is not metaphorical.
|
|
|
149 |
Answer:
|
150 |
"""
|
151 |
|
152 |
+
try:
|
153 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
|
154 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
155 |
+
|
156 |
+
with torch.no_grad():
|
157 |
+
outputs = self.model.generate(
|
158 |
+
**inputs,
|
159 |
+
max_new_tokens=3,
|
160 |
+
do_sample=True,
|
161 |
+
temperature=0.7,
|
162 |
+
top_p=0.8,
|
163 |
+
pad_token_id=self.tokenizer.eos_token_id
|
164 |
+
)
|
165 |
+
|
166 |
+
response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
|
167 |
+
first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"
|
168 |
+
|
169 |
+
if first_word == "YES":
|
170 |
+
chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 1
|
171 |
+
elif first_word == "MAYBE":
|
172 |
+
chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 0.5
|
173 |
+
|
174 |
+
if progress:
|
175 |
+
current_progress += progress_step
|
176 |
+
progress(min(current_progress, 0.9), f"Analyzing {mapped_name}...")
|
177 |
+
|
178 |
+
except Exception as e:
|
179 |
+
logger.error(f"Error analyzing chunk for {mapped_name}: {str(e)}")
|
180 |
+
|
181 |
+
return chunk_triggers
|
182 |
+
|
183 |
+
async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
|
184 |
+
"""Analyze the entire script for triggers with progress updates."""
|
185 |
+
if not self.model or not self.tokenizer:
|
186 |
+
await self.load_model(progress)
|
187 |
+
|
188 |
+
chunks = self._chunk_text(script)
|
189 |
+
identified_triggers = {}
|
190 |
+
progress_step = 0.4 / (len(chunks) * len(self.trigger_categories))
|
191 |
+
current_progress = 0.5 # Starting after model loading
|
192 |
+
|
193 |
+
for chunk_idx, chunk in enumerate(chunks, 1):
|
194 |
+
chunk_triggers = await self.analyze_chunk(
|
195 |
+
chunk,
|
196 |
+
progress,
|
197 |
+
current_progress,
|
198 |
+
progress_step
|
199 |
+
)
|
200 |
|
201 |
+
for trigger, count in chunk_triggers.items():
|
202 |
+
identified_triggers[trigger] = identified_triggers.get(trigger, 0) + count
|
203 |
+
|
204 |
+
if progress:
|
205 |
+
progress(0.95, "Finalizing results...")
|
206 |
+
|
207 |
+
final_triggers = [
|
208 |
+
trigger for trigger, count in identified_triggers.items()
|
209 |
+
if count > 0.5
|
210 |
+
]
|
211 |
+
|
212 |
+
return final_triggers if final_triggers else ["None"]
|
213 |
+
|
214 |
+
async def analyze_content(
|
215 |
+
script: str,
|
216 |
+
progress: Optional[gr.Progress] = None
|
217 |
+
) -> Dict[str, Union[List[str], str]]:
|
218 |
+
"""Main analysis function for the Gradio interface."""
|
219 |
+
analyzer = ContentAnalyzer()
|
220 |
+
|
221 |
+
try:
|
222 |
+
triggers = await analyzer.analyze_script(script, progress)
|
223 |
+
|
224 |
+
if progress:
|
225 |
+
progress(1.0, "Analysis complete!")
|
226 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
result = {
|
228 |
"detected_triggers": triggers,
|
229 |
+
"confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
"model": "Llama-3.2-1B",
|
231 |
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
232 |
}
|
233 |
|
234 |
+
return result
|
|
|
235 |
|
236 |
+
except Exception as e:
|
237 |
+
logger.error(f"Analysis error: {str(e)}")
|
238 |
+
return {
|
239 |
+
"detected_triggers": ["Error occurred during analysis"],
|
240 |
+
"confidence": "Error",
|
241 |
+
"model": "Llama-3.2-1B",
|
242 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
243 |
+
"error": str(e)
|
244 |
+
}
|
245 |
|
246 |
if __name__ == "__main__":
|
247 |
+
# This section is mainly for testing the analyzer directly
|
248 |
+
iface = gr.Interface(
|
249 |
+
fn=analyze_content,
|
250 |
+
inputs=gr.Textbox(lines=8, label="Input Text"),
|
251 |
+
outputs=gr.JSON(),
|
252 |
+
title="Content Analysis",
|
253 |
+
description="Analyze text content for sensitive topics"
|
254 |
+
)
|
255 |
iface.launch()
|