File size: 1,781 Bytes
1e3f619
b31069e
 
 
 
7220677
5c19064
 
 
 
 
0b57b35
5c19064
 
 
 
7220677
 
 
 
 
b31069e
 
 
 
 
 
 
 
 
 
 
7220677
f1342ba
 
 
 
 
b31069e
7220677
 
ef73c14
7220677
b31069e
 
 
 
 
7220677
 
5c19064
b31069e
ef73c14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

import numpy as np
import scipy.io.wavfile as wavfile
from pydub import AudioSegment
import io
import tiktoken
from openai import OpenAI

def transcript_audio_func(audio_file):
    client = OpenAI()
    transcription = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file
    )

    return transcription.text

def count_tokens(input_string: str) -> int:
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(input_string)
    return len(tokens)

# Function to calculate SNR
def calculate_snr(audio_data):
    signal = audio_data
    noise = audio_data - np.mean(audio_data)
    signal_power = np.mean(signal ** 2)
    noise_power = np.mean(noise ** 2)
    snr = 10 * np.log10(signal_power / noise_power)
    return snr

# Function to evaluate audio quality
def evaluate_audio_quality(file) -> dict:
    try:
        audio = AudioSegment.from_file(file)
    except:
        audio = AudioSegment.from_file(io.BytesIO(file.read()))

    audio_data = np.array(audio.get_array_of_samples())

    #number of minutes
    duration = len(audio_data) / audio.frame_rate*2 / 60

    # Calculate volume
    volume = audio.dBFS
    
    # Calculate SNR
    snr = calculate_snr(audio_data)

    #get the transcription of the audio
    transcription = transcript_audio_func(file)

    audit = {
        "volume": volume,
        "SNR": snr,
        "duration": duration,
        "number_of_tokens": count_tokens(transcription),
        "number_of_words": len(transcription.split())
    }

    content = {
        "transcription": transcription,
        "audio_data": audio_data,
        "frame_rate": audio.frame_rate
    }

    audit_global = {
        "audit": audit,
        "content": content
    }
    
    return audit_global