Prthameshh's picture
a5a12ec verified
import json
import re
import tempfile
import os
import streamlit as st
from deepgram import DeepgramClient, PrerecordedOptions, FileSource
import os
from dotenv import load_dotenv
# Load the environment variables from the .env file
# Access the API key
DG_KEY = os.getenv("DG_KEY")
deepgram = DeepgramClient(DG_KEY)
# Function to transcribe an audio file
def transcribe_audio_file(audio_file_path):
# Read the audio file from the local path
with open(audio_file_path, "rb") as audio_file:
buffer_data =
# Define the transcription options
options = {
"model": "nova-2",
"smart_format": True,
"language": "hi", #alternatively 'en'
"diarize": True,
"profanity_filter": False
payload = {
"buffer": buffer_data,
# Call the transcribe_file method with the audio buffer and options
response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
return response
def process_diarized_transcript(res):
transcript = res['results']['channels'][0]['alternatives'][0]
words = res['results']['channels'][0]['alternatives'][0]['words']
current_speaker = None
current_sentence = []
output = []
for word in words:
# This checks if the speaker has changed from the previous word.
if current_speaker != word['speaker']:
if current_sentence:
output.append((current_speaker, ' '.join(current_sentence)))
current_sentence = []
current_speaker = word['speaker'] # This updates the current speaker.
current_sentence.append(word['punctuated_word']) # adds current word to the sentence being built.
# This checks if the current word ends a sentence (by punctuation).
if word['punctuated_word'].endswith(('.', '?', '!')):
output.append((current_speaker, ' '.join(current_sentence)))
current_sentence = []
# adds any remaining words as a final sentence.
if current_sentence:
output.append((current_speaker, ' '.join(current_sentence)))
return output
def format_speaker(speaker_num):
return f"speaker {speaker_num}"
def transcribe_and_process_audio(audio_file_path):
# Transcribe the audio file
res = transcribe_audio_file(audio_file_path)
# Process the diarized transcript
diarized_result = process_diarized_transcript(res)
# Check if the result is available
if not diarized_result:
return "No transcription available. The audio might still be too low quality or silent."
# Initialize an empty string variable to store the transcription
transcription = ""
# Open a text file to write the result
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
file_path =
# Iterate over the diarized result
for speaker, sentence in diarized_result:
# Format the speaker and sentence
line = f"{format_speaker(speaker)}: {sentence}\n"
# Append the line to the transcription variable
transcription += line
# Write the line to the text file
return transcription
# Streamlit interface
st.title("Audio Transcription and Diarization")
uploaded_file = st.file_uploader("Choose an audio file", type=["mp3", "wav", "m4a"])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
temp_audio_file_path =
st.write("Transcribing audio...")
transcription = transcribe_and_process_audio(temp_audio_file_path)