invincible-jha commited on
Commit
1eb4ae1
·
verified ·
1 Parent(s): dd56228

Upload 3 files

Browse files
src/models/analyzer.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .model_manager import ModelManager
2
+ from .audio_processor import AudioProcessor
3
+ from typing import Dict
4
+
5
+ class Analyzer:
6
+ def __init__(self, model_manager: ModelManager, audio_processor: AudioProcessor):
7
+ self.model_manager = model_manager
8
+ self.audio_processor = audio_processor
9
+ self.model_manager.load_models()
10
+
11
+ def analyze(self, audio_path: str) -> Dict:
12
+ # Process audio
13
+ waveform, features = self.audio_processor.process_audio(audio_path)
14
+
15
+ # Get transcription
16
+ transcription = self.model_manager.transcribe(waveform)
17
+
18
+ # Analyze emotions
19
+ emotions = self.model_manager.analyze_emotions(transcription)
20
+
21
+ # Analyze mental health indicators
22
+ mental_health = self.model_manager.analyze_mental_health(transcription)
23
+
24
+ # Combine analysis with audio features
25
+ mental_health = self._combine_analysis(mental_health, features)
26
+
27
+ return {
28
+ 'transcription': transcription,
29
+ 'emotions': {
30
+ 'scores': emotions,
31
+ 'dominant_emotion': max(emotions.items(), key=lambda x: x[1])[0]
32
+ },
33
+ 'mental_health_indicators': mental_health,
34
+ 'audio_features': features
35
+ }
36
+
37
+ def _combine_analysis(self, mental_health: Dict, features: Dict) -> Dict:
38
+ """Combine mental health analysis with audio features"""
39
+ # Adjust risk scores based on audio features
40
+ energy_level = features['energy']['mean']
41
+ pitch_variability = features['pitch']['std']
42
+
43
+ # Simple risk score adjustment based on audio features
44
+ mental_health['depression_risk'] = (
45
+ mental_health['depression_risk'] * 0.7 +
46
+ (1 - energy_level) * 0.3 # Lower energy may indicate depression
47
+ )
48
+
49
+ mental_health['anxiety_risk'] = (
50
+ mental_health['anxiety_risk'] * 0.7 +
51
+ pitch_variability * 0.3 # Higher pitch variability may indicate anxiety
52
+ )
53
+
54
+ # Add confidence scores
55
+ mental_health['confidence'] = {
56
+ 'depression': 0.8, # Example confidence scores
57
+ 'anxiety': 0.8,
58
+ 'stress': 0.7
59
+ }
60
+
61
+ return mental_health
src/models/audio-processor.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ from typing import Dict, Tuple
4
+
5
+ class AudioProcessor:
6
+ def __init__(self):
7
+ self.sample_rate = 16000
8
+ self.n_mfcc = 13
9
+ self.n_mels = 128
10
+
11
+ def process_audio(self, audio_path: str) -> Tuple[np.ndarray, Dict]:
12
+ # Load and preprocess audio
13
+ waveform, sr = librosa.load(audio_path, sr=self.sample_rate)
14
+
15
+ # Extract features
16
+ features = {
17
+ 'mfcc': self._extract_mfcc(waveform),
18
+ 'pitch': self._extract_pitch(waveform),
19
+ 'energy': self._extract_energy(waveform)
20
+ }
21
+
22
+ return waveform, features
23
+
24
+ def _extract_mfcc(self, waveform: np.ndarray) -> np.ndarray:
25
+ mfccs = librosa.feature.mfcc(
26
+ y=waveform,
27
+ sr=self.sample_rate,
28
+ n_mfcc=self.n_mfcc
29
+ )
30
+ return mfccs.mean(axis=1)
31
+
32
+ def _extract_pitch(self, waveform: np.ndarray) -> Dict:
33
+ f0, voiced_flag, voiced_probs = librosa.pyin(
34
+ waveform,
35
+ fmin=librosa.note_to_hz('C2'),
36
+ fmax=librosa.note_to_hz('C7'),
37
+ sr=self.sample_rate
38
+ )
39
+
40
+ return {
41
+ 'mean': float(np.nanmean(f0)),
42
+ 'std': float(np.nanstd(f0)),
43
+ 'max': float(np.nanmax(f0)),
44
+ 'min': float(np.nanmin(f0))
45
+ }
46
+
47
+ def _extract_energy(self, waveform: np.ndarray) -> Dict:
48
+ rms = librosa.feature.rms(y=waveform)[0]
49
+
50
+ return {
51
+ 'mean': float(np.mean(rms)),
52
+ 'std': float(np.std(rms)),
53
+ 'max': float(np.max(rms)),
54
+ 'min': float(np.min(rms))
55
+ }
src/models/model-manager.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ WhisperProcessor, WhisperForConditionalGeneration,
3
+ AutoModelForSequenceClassification, AutoTokenizer
4
+ )
5
+ import torch
6
+
7
+ class ModelManager:
8
+ def __init__(self):
9
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ self.models = {}
11
+ self.tokenizers = {}
12
+ self.processors = {}
13
+
14
+ def load_models(self):
15
+ # Load Whisper for speech recognition
16
+ self.processors['whisper'] = WhisperProcessor.from_pretrained("openai/whisper-base")
17
+ self.models['whisper'] = WhisperForConditionalGeneration.from_pretrained(
18
+ "openai/whisper-base"
19
+ ).to(self.device)
20
+
21
+ # Load EmoBERTa for emotion detection
22
+ self.tokenizers['emotion'] = AutoTokenizer.from_pretrained("arpanghoshal/EmoRoBERTa")
23
+ self.models['emotion'] = AutoModelForSequenceClassification.from_pretrained(
24
+ "arpanghoshal/EmoRoBERTa"
25
+ ).to(self.device)
26
+
27
+ # Load ClinicalBERT for analysis
28
+ self.tokenizers['clinical'] = AutoTokenizer.from_pretrained(
29
+ "emilyalsentzer/Bio_ClinicalBERT"
30
+ )
31
+ self.models['clinical'] = AutoModelForSequenceClassification.from_pretrained(
32
+ "emilyalsentzer/Bio_ClinicalBERT"
33
+ ).to(self.device)
34
+
35
+ def transcribe(self, audio_input):
36
+ inputs = self.processors['whisper'](
37
+ audio_input,
38
+ return_tensors="pt"
39
+ ).input_features.to(self.device)
40
+
41
+ generated_ids = self.models['whisper'].generate(inputs)
42
+ transcription = self.processors['whisper'].batch_decode(
43
+ generated_ids,
44
+ skip_special_tokens=True
45
+ )[0]
46
+ return transcription
47
+
48
+ def analyze_emotions(self, text):
49
+ inputs = self.tokenizers['emotion'](
50
+ text,
51
+ return_tensors="pt",
52
+ padding=True,
53
+ truncation=True,
54
+ max_length=512
55
+ ).to(self.device)
56
+
57
+ outputs = self.models['emotion'](**inputs)
58
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
59
+
60
+ emotions = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
61
+ return {emotion: float(prob) for emotion, prob in zip(emotions, probs[0])}
62
+
63
+ def analyze_mental_health(self, text):
64
+ inputs = self.tokenizers['clinical'](
65
+ text,
66
+ return_tensors="pt",
67
+ padding=True,
68
+ truncation=True,
69
+ max_length=512
70
+ ).to(self.device)
71
+
72
+ outputs = self.models['clinical'](**inputs)
73
+ scores = torch.sigmoid(outputs.logits)
74
+
75
+ return {
76
+ 'depression_risk': float(scores[0][0]),
77
+ 'anxiety_risk': float(scores[0][1]),
78
+ 'stress_level': float(scores[0][2])
79
+ }