Spaces:
Running
Running
import numpy as np | |
import scipy.io.wavfile as wavfile | |
from pydub import AudioSegment | |
import io | |
import tiktoken | |
from openai import OpenAI | |
def transcript_audio_func(audio_file): | |
client = OpenAI() | |
transcription = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file | |
) | |
return transcription.text | |
def count_tokens(input_string: str) -> int: | |
tokenizer = tiktoken.get_encoding("cl100k_base") | |
tokens = tokenizer.encode(input_string) | |
return len(tokens) | |
# Function to calculate SNR | |
def calculate_snr(audio_data): | |
signal = audio_data | |
noise = audio_data - np.mean(audio_data) | |
signal_power = np.mean(signal ** 2) | |
noise_power = np.mean(noise ** 2) | |
snr = 10 * np.log10(signal_power / noise_power) | |
return snr | |
# Function to evaluate audio quality | |
def evaluate_audio_quality(file) -> dict: | |
try: | |
audio = AudioSegment.from_file(file) | |
except: | |
audio = AudioSegment.from_file(io.BytesIO(file.read())) | |
audio_data = np.array(audio.get_array_of_samples()) | |
#number of minutes | |
duration = len(audio_data) / audio.frame_rate*2 / 60 | |
# Calculate volume | |
volume = audio.dBFS | |
# Calculate SNR | |
snr = calculate_snr(audio_data) | |
#get the transcription of the audio | |
transcription = transcript_audio_func(file) | |
audit = { | |
"volume": volume, | |
"SNR": snr, | |
"duration": duration, | |
"number_of_tokens": count_tokens(transcription), | |
"number_of_words": len(transcription.split()) | |
} | |
content = { | |
"transcription": transcription, | |
"audio_data": audio_data, | |
"frame_rate": audio.frame_rate | |
} | |
audit_global = { | |
"audit": audit, | |
"content": content | |
} | |
return audit_global | |