Kuberwastaken commited on
Commit
83c0537
·
1 Parent(s): b4a4d6e

Improved functionality

Browse files
Files changed (2) hide show
  1. gradio_app.py +99 -134
  2. model/analyzer.py +224 -197
gradio_app.py CHANGED
@@ -1,172 +1,137 @@
 
1
  import gradio as gr
2
- import time
3
  from model.analyzer import analyze_content
 
4
 
5
- # Custom CSS for the interface
6
- css = """
7
- @import url('https://fonts.googleapis.com/css2?family=Nunito:wght@400;700&display=swap');
8
-
9
- #treat-container {
10
- background: linear-gradient(135deg, #ffE6F0 0%, #E6F0FF 100%);
11
- padding: 2rem;
12
- border-radius: 20px;
13
- box-shadow: 0 8px 32px rgba(0,0,0,0.1);
14
- font-family: 'Nunito', sans-serif;
15
  }
16
 
17
- #treat-title {
18
- text-align: center;
19
- color: #FF69B4;
20
- font-size: 3.5rem;
21
  font-weight: bold;
22
- margin-bottom: 0.5rem;
23
- text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
24
  }
25
 
26
- #treat-subtitle {
27
- text-align: center;
28
- color: #666;
29
- font-size: 1.2rem;
30
- margin-bottom: 2rem;
31
  }
32
 
33
- #treat-subtitle span {
34
- color: #FF69B4;
35
  font-weight: bold;
36
  }
37
 
38
  .content-box {
39
- background: rgba(255,255,255,0.9);
40
  border-radius: 15px;
41
- border: 2px solid #FFB6C1;
42
- padding: 1rem;
 
43
  }
44
 
45
- .analyze-button {
46
- background: linear-gradient(45deg, #FF69B4, #87CEEB) !important;
47
- border: none !important;
48
- border-radius: 25px !important;
49
- color: white !important;
50
- font-family: 'Nunito', sans-serif !important;
51
- font-weight: bold !important;
52
- padding: 0.8rem 2rem !important;
53
- transition: transform 0.2s !important;
54
  }
55
 
56
- .analyze-button:hover {
57
- transform: translateY(-2px) !important;
58
  }
59
 
60
- .results-container {
61
- background: rgba(255,255,255,0.9);
62
- border-radius: 15px;
63
- padding: 1.5rem;
64
- margin-top: 1rem;
65
- border: 2px solid #87CEEB;
 
66
  }
67
 
68
- #loading-bar {
69
- height: 6px;
70
- background: linear-gradient(90deg, #FF69B4, #87CEEB);
71
- border-radius: 3px;
72
- transition: width 0.3s ease;
73
  }
74
  """
75
 
76
- def analyze_with_loading(text):
77
- # Simulate loading progress (you can integrate this with your actual analysis)
78
- for i in range(100):
79
- time.sleep(0.02) # Simulate processing time
80
- yield {"progress": i + 1}
 
 
 
81
 
82
- # Perform the actual analysis
 
83
  result = analyze_content(text)
84
 
85
- # Format the results
86
- if result["detected_triggers"] == ["None"]:
87
- triggers_text = "No triggers detected"
88
- else:
89
- triggers_text = ", ".join(result["detected_triggers"])
90
 
91
- yield {
92
- "progress": 100,
93
- "result": f"""
94
- <div class='results-container'>
95
- <h3 style='color: #FF69B4; margin-bottom: 1rem;'>Analysis Results</h3>
96
- <p><strong>Triggers Detected:</strong> {triggers_text}</p>
97
- <p><strong>Confidence:</strong> {result['confidence']}</p>
98
- <p><strong>Analysis Time:</strong> {result['analysis_timestamp']}</p>
99
- </div>
100
- """
101
- }
102
 
103
- with gr.Blocks(css=css) as iface:
104
- with gr.Column(elem_id="treat-container"):
 
105
  gr.HTML("""
106
- <div id="treat-title">TREAT</div>
107
- <div id="treat-subtitle">
108
- <span>T</span>rigger <span>R</span>ecognition for
109
- <span>E</span>njoyable and <span>A</span>ppropriate
110
- <span>T</span>elevision
111
- </div>
112
  """)
113
-
114
- text_input = gr.Textbox(
115
- label="Enter your content for analysis",
116
- placeholder="Paste your script or content here...",
117
- lines=8,
118
- elem_classes=["content-box"]
119
- )
120
-
121
- analyze_btn = gr.Button(
122
- "🍬 Analyze Content",
123
- elem_classes=["analyze-button"]
124
- )
125
-
126
- progress = gr.Number(
127
- value=0,
128
- visible=False,
129
- elem_id="progress-value"
130
  )
131
-
132
- gr.HTML("""
133
- <div style="width: 100%; height: 6px; background: #eee; border-radius: 3px; margin: 1rem 0;">
134
- <div id="loading-bar" style="width: 0%"></div>
135
- </div>
136
- """)
137
-
138
- output = gr.HTML()
139
-
140
- # JavaScript for updating the loading bar
141
- gr.HTML("""
142
- <script>
143
- function updateLoadingBar(progress) {
144
- document.getElementById('loading-bar').style.width = progress + '%';
145
- }
146
-
147
- // Watch for changes to the progress value
148
- const observer = new MutationObserver((mutations) => {
149
- mutations.forEach((mutation) => {
150
- if (mutation.type === 'attributes' && mutation.attributeName === 'value') {
151
- const progress = document.getElementById('progress-value').value;
152
- updateLoadingBar(progress);
153
- }
154
- });
155
- });
156
-
157
- // Start observing the progress value element
158
- observer.observe(document.getElementById('progress-value'), {
159
- attributes: true
160
- });
161
- </script>
162
- """)
163
-
164
- analyze_btn.click(
165
- fn=analyze_with_loading,
166
- inputs=[text_input],
167
- outputs=[gr.State({"progress": 0, "result": ""}), output],
168
- show_progress=False
169
  )
 
 
 
 
 
 
 
 
170
 
171
  if __name__ == "__main__":
172
  iface.launch()
 
1
+ # gradio_app.py
2
  import gradio as gr
 
3
  from model.analyzer import analyze_content
4
+ import time
5
 
6
+ # Custom CSS for styling
7
+ custom_css = """
8
+ .treat-title {
9
+ text-align: center;
10
+ padding: 20px;
11
+ margin-bottom: 20px;
12
+ background: linear-gradient(135deg, #fce4ec 0%, #e3f2fd 100%);
13
+ border-radius: 15px;
14
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 
15
  }
16
 
17
+ .treat-title h1 {
18
+ font-size: 3em;
19
+ color: #d81b60;
20
+ margin-bottom: 10px;
21
  font-weight: bold;
 
 
22
  }
23
 
24
+ .treat-title p {
25
+ font-size: 1.2em;
26
+ color: #5c6bc0;
 
 
27
  }
28
 
29
+ .highlight {
30
+ color: #d81b60;
31
  font-weight: bold;
32
  }
33
 
34
  .content-box {
35
+ background: rgba(255, 255, 255, 0.9);
36
  border-radius: 15px;
37
+ padding: 20px;
38
+ margin: 20px 0;
39
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
40
  }
41
 
42
+ .results-box {
43
+ background: rgba(255, 255, 255, 0.9);
44
+ border-radius: 15px;
45
+ padding: 20px;
46
+ margin-top: 20px;
47
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 
 
 
48
  }
49
 
50
+ .gradio-container {
51
+ background: linear-gradient(135deg, #fce4ec 0%, #e3f2fd 100%);
52
  }
53
 
54
+ .analyze-button {
55
+ background-color: #d81b60 !important;
56
+ color: white !important;
57
+ border-radius: 25px !important;
58
+ padding: 10px 20px !important;
59
+ font-size: 1.1em !important;
60
+ transition: transform 0.2s !important;
61
  }
62
 
63
+ .analyze-button:hover {
64
+ transform: scale(1.05) !important;
 
 
 
65
  }
66
  """
67
 
68
+ def analyze_with_loading(text, progress=gr.Progress()):
69
+ # Initialize progress
70
+ progress(0, desc="Starting analysis...")
71
+
72
+ # Simulate initial loading (model preparation)
73
+ for i in range(30):
74
+ time.sleep(0.1) # Reduced sleep time for better UX
75
+ progress((i + 1) / 100)
76
 
77
+ # Perform actual analysis
78
+ progress(0.3, desc="Processing text...")
79
  result = analyze_content(text)
80
 
81
+ # Simulate final processing
82
+ for i in range(70, 100):
83
+ time.sleep(0.05) # Reduced sleep time
84
+ progress((i + 1) / 100)
 
85
 
86
+ # Format the results for display
87
+ triggers = result["detected_triggers"]
88
+ if triggers == ["None"]:
89
+ return "No triggers detected in the content."
90
+ else:
91
+ trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
92
+ return f"Triggers Detected:\n{trigger_list}"
 
 
 
 
93
 
94
+ with gr.Blocks(css=custom_css) as iface:
95
+ # Title section
96
+ with gr.Box(elem_classes="treat-title"):
97
  gr.HTML("""
98
+ <h1>TREAT</h1>
99
+ <p><span class="highlight">T</span>rigger
100
+ <span class="highlight">R</span>ecognition for
101
+ <span class="highlight">E</span>njoyable and
102
+ <span class="highlight">A</span>ppropriate
103
+ <span class="highlight">T</span>elevision</p>
104
  """)
105
+
106
+ # Content input section
107
+ with gr.Box(elem_classes="content-box"):
108
+ input_text = gr.Textbox(
109
+ label="Content to Analyze",
110
+ placeholder="Paste your content here...",
111
+ lines=8
 
 
 
 
 
 
 
 
 
 
112
  )
113
+
114
+ # Analysis button
115
+ analyze_btn = gr.Button(
116
+ "Analyze Content",
117
+ elem_classes="analyze-button"
118
+ )
119
+
120
+ # Results section
121
+ with gr.Box(elem_classes="results-box"):
122
+ output_text = gr.Textbox(
123
+ label="Analysis Results",
124
+ lines=5,
125
+ readonly=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
+
128
+ # Set up the click event
129
+ analyze_btn.click(
130
+ fn=analyze_with_loading,
131
+ inputs=[input_text],
132
+ outputs=[output_text],
133
+ api_name="analyze"
134
+ )
135
 
136
  if __name__ == "__main__":
137
  iface.launch()
model/analyzer.py CHANGED
@@ -1,145 +1,146 @@
1
- # analyzer.py
2
- # model > analyzer.py
3
-
4
  import os
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
  import torch
7
  from datetime import datetime
8
  import gradio as gr
9
-
10
- # Fetch the Hugging Face token from the environment variable (secrets)
11
- hf_token = os.getenv("HF_TOKEN")
12
-
13
- if not hf_token:
14
- raise ValueError("HF_TOKEN environment variable is not set!")
15
-
16
- def analyze_script(script):
17
- # Starting the script analysis
18
- print("\n=== Starting Analysis ===")
19
- print(f"Time: {datetime.now()}") # Outputting the current timestamp
20
- print("Loading model and tokenizer...")
21
-
22
- try:
23
- # Load the tokenizer and model, selecting the appropriate device (CPU or CUDA)
24
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True)
25
- device = "cuda" if torch.cuda.is_available() else "cpu" # Use CUDA if available, else use CPU
26
- print(f"Using device: {device}")
27
-
28
- # Load model with token authentication
29
- model = AutoModelForCausalLM.from_pretrained(
30
- "meta-llama/Llama-3.2-1B",
31
- token=hf_token, # Pass the token to authenticate
32
- torch_dtype=torch.float16 if device == "cuda" else torch.float32, # Use 16-bit precision for CUDA, 32-bit for CPU
33
- device_map="auto" # Automatically map model to available device
34
- )
35
- print("Model loaded successfully")
36
-
37
- except Exception as e:
38
- print(f"An error occurred: {e}")
39
- return []
40
-
41
- # Define trigger categories with their descriptions
42
- trigger_categories = {
43
- "Violence": {
44
- "mapped_name": "Violence",
45
- "description": (
46
- "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
47
- "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
48
- "or large-scale events like wars, riots, or violent protests."
49
- )
50
- },
51
- "Death": {
52
- "mapped_name": "Death References",
53
- "description": (
54
- "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
55
- "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
56
- "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
57
- )
58
- },
59
- "Substance Use": {
60
- "mapped_name": "Substance Use",
61
- "description": (
62
- "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
63
- "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, "
64
- "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)."
65
- )
66
- },
67
- "Gore": {
68
- "mapped_name": "Gore",
69
- "description": (
70
- "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
71
- "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
72
- )
73
- },
74
- "Vomit": {
75
- "mapped_name": "Vomit",
76
- "description": (
77
- "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail. This includes sounds or visual descriptions of the act, "
78
- "mentions of nausea leading to vomiting, or its aftermath (e.g., the presence of vomit, cleaning it up, or characters reacting to it)."
79
- )
80
- },
81
- "Sexual Content": {
82
- "mapped_name": "Sexual Content",
83
- "description": (
84
- "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
85
- "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)."
86
- )
87
- },
88
- "Sexual Abuse": {
89
- "mapped_name": "Sexual Abuse",
90
- "description": (
91
- "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
92
- "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. "
93
- "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. "
94
- "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. "
95
- "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category."
96
- )
97
- },
98
- "Self-Harm": {
99
- "mapped_name": "Self-Harm",
100
- "description": (
101
- "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
102
- "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
103
- )
104
- },
105
- "Gun Use": {
106
- "mapped_name": "Gun Use",
107
- "description": (
108
- "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
109
- "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
110
- )
111
- },
112
- "Animal Cruelty": {
113
- "mapped_name": "Animal Cruelty",
114
- "description": (
115
- "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
116
- "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
117
- )
118
- },
119
- "Mental Health Issues": {
120
- "mapped_name": "Mental Health Issues",
121
- "description": (
122
- "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
123
- "or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
124
- "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
125
- )
126
  }
127
- }
128
 
129
- print("\nProcessing text...") # Output indicating the text is being processed
130
- chunk_size = 256 # Set the chunk size for text processing
131
- overlap = 15 # Overlap between chunks for context preservation
132
- script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
 
 
 
 
 
 
133
 
134
- identified_triggers = {}
 
 
 
 
 
 
 
 
135
 
136
- for chunk_idx, chunk in enumerate(script_chunks, 1):
137
- print(f"\n--- Processing Chunk {chunk_idx}/{len(script_chunks)} ---")
138
- for category, info in trigger_categories.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  mapped_name = info["mapped_name"]
140
  description = info["description"]
141
 
142
- print(f"\nAnalyzing for {mapped_name}...")
143
  prompt = f"""
144
  Check this text for any indication of {mapped_name} ({description}).
145
  Be sensitive to subtle references or implications, make sure the text is not metaphorical.
@@ -148,81 +149,107 @@ def analyze_script(script):
148
  Answer:
149
  """
150
 
151
- print("Sending prompt to model...") # Indicate that prompt is being sent to the model
152
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Tokenize the prompt
153
- inputs = {k: v.to(device) for k, v in inputs.items()} # Send inputs to the chosen device
154
-
155
- with torch.no_grad(): # Disable gradient calculation for inference
156
- print("Generating response...") # Indicate that the model is generating a response
157
- outputs = model.generate(
158
- **inputs,
159
- max_new_tokens=3, # Limit response length
160
- do_sample=True, # Enable sampling for more diverse output
161
- temperature=0.7, # Control randomness of the output
162
- top_p=0.8, # Use nucleus sampling
163
- pad_token_id=tokenizer.eos_token_id # Pad token ID
164
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper() # Decode and format the response
167
- first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO" # Get the first word of the response
168
- print(f"Model response for {mapped_name}: {first_word}")
169
-
170
- # Update identified triggers based on model response
171
- if first_word == "YES":
172
- print(f"Detected {mapped_name} in this chunk!") # Trigger detected
173
- identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
174
- elif first_word == "MAYBE":
175
- print(f"Possible {mapped_name} detected, marking for further review.") # Possible trigger detected
176
- identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
177
- else:
178
- print(f"No {mapped_name} detected in this chunk.") # No trigger detected
179
-
180
- print("\n=== Analysis Complete ===") # Indicate that analysis is complete
181
- final_triggers = [] # List to store final triggers
182
-
183
- # Filter and output the final trigger results
184
- for mapped_name, count in identified_triggers.items():
185
- if count > 0.5:
186
- final_triggers.append(mapped_name)
187
- print(f"- {mapped_name}: found in {count} chunks")
188
-
189
- if not final_triggers:
190
- final_triggers = ["None"]
191
-
192
- return final_triggers
193
-
194
- # Define the Gradio interface
195
- def analyze_content(script):
196
- # Perform the analysis on the input script using the analyze_script function
197
- triggers = analyze_script(script)
198
-
199
- # Define the result based on the triggers found
200
- if isinstance(triggers, list) and triggers != ["None"]:
201
  result = {
202
  "detected_triggers": triggers,
203
- "confidence": "High - Content detected",
204
- "model": "Llama-3.2-1B",
205
- "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
206
- }
207
- else:
208
- result = {
209
- "detected_triggers": ["None"],
210
- "confidence": "High - No concerning content detected",
211
  "model": "Llama-3.2-1B",
212
  "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
213
  }
214
 
215
- print("\nFinal Result Dictionary:", result)
216
- return result
217
 
218
- # Create and launch the Gradio interface
219
- iface = gr.Interface(
220
- fn=analyze_content,
221
- inputs=gr.Textbox(lines=8, label="Input Text"),
222
- outputs=gr.JSON(),
223
- title="Content Analysis",
224
- description="Analyze text content for sensitive topics"
225
- )
 
226
 
227
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
228
  iface.launch()
 
 
 
 
1
  import os
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  from datetime import datetime
5
  import gradio as gr
6
+ from typing import Dict, List, Union, Optional
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class ContentAnalyzer:
14
+ def __init__(self):
15
+ self.hf_token = os.getenv("HF_TOKEN")
16
+ if not self.hf_token:
17
+ raise ValueError("HF_TOKEN environment variable is not set!")
18
+
19
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ self.model = None
21
+ self.tokenizer = None
22
+ self.trigger_categories = self._init_trigger_categories()
23
+
24
+ def _init_trigger_categories(self) -> Dict:
25
+ """Initialize trigger categories with their descriptions."""
26
+ return {
27
+ "Violence": {
28
+ "mapped_name": "Violence",
29
+ "description": (
30
+ "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
31
+ "Includes direct physical confrontations, implied violence, or large-scale events like wars, riots, or violent protests."
32
+ )
33
+ },
34
+ "Death": {
35
+ "mapped_name": "Death References",
36
+ "description": (
37
+ "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, mentions of deceased individuals, "
38
+ "or abstract references to mortality. This covers depictions of funerals, mourning, or death-centered dialogue."
39
+ )
40
+ },
41
+ "Substance Use": {
42
+ "mapped_name": "Substance Use",
43
+ "description": (
44
+ "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
45
+ "Includes scenes of drinking, smoking, drug use, withdrawal symptoms, or rehabilitation."
46
+ )
47
+ },
48
+ "Gore": {
49
+ "mapped_name": "Gore",
50
+ "description": (
51
+ "Extremely detailed and graphic depictions of severe physical injuries, mutilation, or extreme bodily harm, including heavy blood, "
52
+ "exposed organs, or dismemberment."
53
+ )
54
+ },
55
+ "Vomit": {
56
+ "mapped_name": "Vomit",
57
+ "description": "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail."
58
+ },
59
+ "Sexual Content": {
60
+ "mapped_name": "Sexual Content",
61
+ "description": (
62
+ "Any depiction or mention of sexual activity, intimacy, or sexual behavior, from implied scenes to explicit descriptions."
63
+ )
64
+ },
65
+ "Sexual Abuse": {
66
+ "mapped_name": "Sexual Abuse",
67
+ "description": (
68
+ "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force."
69
+ )
70
+ },
71
+ "Self-Harm": {
72
+ "mapped_name": "Self-Harm",
73
+ "description": (
74
+ "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves, including suicidal thoughts."
75
+ )
76
+ },
77
+ "Gun Use": {
78
+ "mapped_name": "Gun Use",
79
+ "description": (
80
+ "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner."
81
+ )
82
+ },
83
+ "Animal Cruelty": {
84
+ "mapped_name": "Animal Cruelty",
85
+ "description": (
86
+ "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental."
87
+ )
88
+ },
89
+ "Mental Health Issues": {
90
+ "mapped_name": "Mental Health Issues",
91
+ "description": (
92
+ "Any reference to mental health struggles, disorders, or psychological distress, including therapy and treatment."
93
+ )
94
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
 
96
 
97
+ async def load_model(self, progress=None) -> None:
98
+ """Load the model and tokenizer with progress updates."""
99
+ try:
100
+ if progress:
101
+ progress(0.1, "Loading tokenizer...")
102
+
103
+ self.tokenizer = AutoTokenizer.from_pretrained(
104
+ "meta-llama/Llama-3.2-1B",
105
+ use_fast=True
106
+ )
107
 
108
+ if progress:
109
+ progress(0.3, "Loading model...")
110
+
111
+ self.model = AutoModelForCausalLM.from_pretrained(
112
+ "meta-llama/Llama-3.2-1B",
113
+ token=self.hf_token,
114
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
115
+ device_map="auto"
116
+ )
117
 
118
+ if progress:
119
+ progress(0.5, "Model loaded successfully")
120
+
121
+ logger.info(f"Model loaded successfully on {self.device}")
122
+ except Exception as e:
123
+ logger.error(f"Error loading model: {str(e)}")
124
+ raise
125
+
126
+ def _chunk_text(self, text: str, chunk_size: int = 256, overlap: int = 15) -> List[str]:
127
+ """Split text into overlapping chunks for processing."""
128
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - overlap)]
129
+
130
+ async def analyze_chunk(
131
+ self,
132
+ chunk: str,
133
+ progress: Optional[gr.Progress] = None,
134
+ current_progress: float = 0,
135
+ progress_step: float = 0
136
+ ) -> Dict[str, float]:
137
+ """Analyze a single chunk of text for triggers."""
138
+ chunk_triggers = {}
139
+
140
+ for category, info in self.trigger_categories.items():
141
  mapped_name = info["mapped_name"]
142
  description = info["description"]
143
 
 
144
  prompt = f"""
145
  Check this text for any indication of {mapped_name} ({description}).
146
  Be sensitive to subtle references or implications, make sure the text is not metaphorical.
 
149
  Answer:
150
  """
151
 
152
+ try:
153
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
154
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
155
+
156
+ with torch.no_grad():
157
+ outputs = self.model.generate(
158
+ **inputs,
159
+ max_new_tokens=3,
160
+ do_sample=True,
161
+ temperature=0.7,
162
+ top_p=0.8,
163
+ pad_token_id=self.tokenizer.eos_token_id
164
+ )
165
+
166
+ response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
167
+ first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"
168
+
169
+ if first_word == "YES":
170
+ chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 1
171
+ elif first_word == "MAYBE":
172
+ chunk_triggers[mapped_name] = chunk_triggers.get(mapped_name, 0) + 0.5
173
+
174
+ if progress:
175
+ current_progress += progress_step
176
+ progress(min(current_progress, 0.9), f"Analyzing {mapped_name}...")
177
+
178
+ except Exception as e:
179
+ logger.error(f"Error analyzing chunk for {mapped_name}: {str(e)}")
180
+
181
+ return chunk_triggers
182
+
183
+ async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
184
+ """Analyze the entire script for triggers with progress updates."""
185
+ if not self.model or not self.tokenizer:
186
+ await self.load_model(progress)
187
+
188
+ chunks = self._chunk_text(script)
189
+ identified_triggers = {}
190
+ progress_step = 0.4 / (len(chunks) * len(self.trigger_categories))
191
+ current_progress = 0.5 # Starting after model loading
192
+
193
+ for chunk_idx, chunk in enumerate(chunks, 1):
194
+ chunk_triggers = await self.analyze_chunk(
195
+ chunk,
196
+ progress,
197
+ current_progress,
198
+ progress_step
199
+ )
200
 
201
+ for trigger, count in chunk_triggers.items():
202
+ identified_triggers[trigger] = identified_triggers.get(trigger, 0) + count
203
+
204
+ if progress:
205
+ progress(0.95, "Finalizing results...")
206
+
207
+ final_triggers = [
208
+ trigger for trigger, count in identified_triggers.items()
209
+ if count > 0.5
210
+ ]
211
+
212
+ return final_triggers if final_triggers else ["None"]
213
+
214
+ async def analyze_content(
215
+ script: str,
216
+ progress: Optional[gr.Progress] = None
217
+ ) -> Dict[str, Union[List[str], str]]:
218
+ """Main analysis function for the Gradio interface."""
219
+ analyzer = ContentAnalyzer()
220
+
221
+ try:
222
+ triggers = await analyzer.analyze_script(script, progress)
223
+
224
+ if progress:
225
+ progress(1.0, "Analysis complete!")
226
+
 
 
 
 
 
 
 
 
 
227
  result = {
228
  "detected_triggers": triggers,
229
+ "confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
 
 
 
 
 
 
 
230
  "model": "Llama-3.2-1B",
231
  "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
232
  }
233
 
234
+ return result
 
235
 
236
+ except Exception as e:
237
+ logger.error(f"Analysis error: {str(e)}")
238
+ return {
239
+ "detected_triggers": ["Error occurred during analysis"],
240
+ "confidence": "Error",
241
+ "model": "Llama-3.2-1B",
242
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
243
+ "error": str(e)
244
+ }
245
 
246
  if __name__ == "__main__":
247
+ # This section is mainly for testing the analyzer directly
248
+ iface = gr.Interface(
249
+ fn=analyze_content,
250
+ inputs=gr.Textbox(lines=8, label="Input Text"),
251
+ outputs=gr.JSON(),
252
+ title="Content Analysis",
253
+ description="Analyze text content for sensitive topics"
254
+ )
255
  iface.launch()