Spaces:
Runtime error
Runtime error
harveysamson
commited on
Commit
·
cd87e9f
1
Parent(s):
5d47fc0
added comments
Browse files- app.py +4 -2
- src/modeling_outputs.py +2 -0
- src/models.py +2 -0
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import torch
|
2 |
import torch.nn.functional as F
|
3 |
from transformers import AutoConfig, Wav2Vec2FeatureExtractor
|
4 |
-
from src.models import Wav2Vec2ForSpeechClassification
|
5 |
import gradio as gr
|
6 |
import librosa
|
7 |
|
@@ -12,6 +12,7 @@ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
|
|
12 |
sampling_rate = feature_extractor.sampling_rate
|
13 |
model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
|
14 |
|
|
|
15 |
def load_data(path):
|
16 |
speech, sampling_rate = librosa.load(path)
|
17 |
if len(speech.shape) > 1:
|
@@ -20,6 +21,7 @@ def load_data(path):
|
|
20 |
speech = librosa.resample(speech, sampling_rate,16000)
|
21 |
return speech
|
22 |
|
|
|
23 |
def inference(path):
|
24 |
speech = load_data(path)
|
25 |
inputs = feature_extractor(speech, return_tensors="pt").input_values
|
@@ -32,7 +34,7 @@ def inference(path):
|
|
32 |
inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
|
33 |
outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
|
34 |
title = "Wav2Vec2 Speech Emotion Recognition"
|
35 |
-
description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload
|
36 |
examples = ['data/heart.wav', 'data/happy26.wav', 'data/jm24.wav', 'data/newton.wav', 'data/speeding.wav']
|
37 |
article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
|
38 |
|
|
|
1 |
import torch
|
2 |
import torch.nn.functional as F
|
3 |
from transformers import AutoConfig, Wav2Vec2FeatureExtractor
|
4 |
+
from src.models import Wav2Vec2ForSpeechClassification #imported from https://github.com/m3hrdadfi/soxan
|
5 |
import gradio as gr
|
6 |
import librosa
|
7 |
|
|
|
12 |
sampling_rate = feature_extractor.sampling_rate
|
13 |
model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
|
14 |
|
15 |
+
#load input file and resample to 16kHz
|
16 |
def load_data(path):
|
17 |
speech, sampling_rate = librosa.load(path)
|
18 |
if len(speech.shape) > 1:
|
|
|
21 |
speech = librosa.resample(speech, sampling_rate,16000)
|
22 |
return speech
|
23 |
|
24 |
+
#modified version of predict function from https://github.com/m3hrdadfi/soxan
|
25 |
def inference(path):
|
26 |
speech = load_data(path)
|
27 |
inputs = feature_extractor(speech, return_tensors="pt").input_values
|
|
|
34 |
inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
|
35 |
outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
|
36 |
title = "Wav2Vec2 Speech Emotion Recognition"
|
37 |
+
description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload an audio file and the top emotions predicted will be displayed."
|
38 |
examples = ['data/heart.wav', 'data/happy26.wav', 'data/jm24.wav', 'data/newton.wav', 'data/speeding.wav']
|
39 |
article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
|
40 |
|
src/modeling_outputs.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from typing import Optional, Tuple
|
3 |
import torch
|
|
|
1 |
+
#imported from https://github.com/m3hrdadfi/soxan to implement Wav2Vec2 for speech classification
|
2 |
+
|
3 |
from dataclasses import dataclass
|
4 |
from typing import Optional, Tuple
|
5 |
import torch
|
src/models.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
3 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|
|
|
1 |
+
#imported from https://github.com/m3hrdadfi/soxan to implement Wav2Vec2 for speech classification
|
2 |
+
|
3 |
import torch
|
4 |
import torch.nn as nn
|
5 |
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
|