ANALYSE_AGENT / utils /audit /audit_audio.py
Ilyas KHIAT
chatbot
1e3f619
import numpy as np
import scipy.io.wavfile as wavfile
from pydub import AudioSegment
import io
import tiktoken
from openai import OpenAI
def transcript_audio_func(audio_file):
client = OpenAI()
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcription.text
def count_tokens(input_string: str) -> int:
tokenizer = tiktoken.get_encoding("cl100k_base")
tokens = tokenizer.encode(input_string)
return len(tokens)
# Function to calculate SNR
def calculate_snr(audio_data):
signal = audio_data
noise = audio_data - np.mean(audio_data)
signal_power = np.mean(signal ** 2)
noise_power = np.mean(noise ** 2)
snr = 10 * np.log10(signal_power / noise_power)
return snr
# Function to evaluate audio quality
def evaluate_audio_quality(file) -> dict:
try:
audio = AudioSegment.from_file(file)
except:
audio = AudioSegment.from_file(io.BytesIO(file.read()))
audio_data = np.array(audio.get_array_of_samples())
#number of minutes
duration = len(audio_data) / audio.frame_rate*2 / 60
# Calculate volume
volume = audio.dBFS
# Calculate SNR
snr = calculate_snr(audio_data)
#get the transcription of the audio
transcription = transcript_audio_func(file)
audit = {
"volume": volume,
"SNR": snr,
"duration": duration,
"number_of_tokens": count_tokens(transcription),
"number_of_words": len(transcription.split())
}
content = {
"transcription": transcription,
"audio_data": audio_data,
"frame_rate": audio.frame_rate
}
audit_global = {
"audit": audit,
"content": content
}
return audit_global