Spaces:

HARISH20205
/

verbisense

Sleeping

verbisense / src /audio_processor.py

first

c8c7a9e 3 months ago

1.6 kB

	import whisper
	import requests
	import ffmpeg
	import numpy as np
	from typing import List, Dict, Any

	def process_audio_from_url(audio_url: str) -> List[Dict[str, Any]]:
	# Download the audio file content
	response = requests.get(audio_url, stream=True)
	response.raise_for_status()

	# Use ffmpeg to decode the audio stream
	try:
	out, _ = (
	ffmpeg
	.input('pipe:0')
	.output('pipe:1', format='f32le', acodec='pcm_f32le', ac=1, ar='16k')
	.run(input=response.raw.read(), capture_stdout=True, capture_stderr=True)
	)
	except ffmpeg.Error as e:
	raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e

	# Convert the audio to the format Whisper expects
	audio = np.frombuffer(out, np.float32).flatten()

	# Load the Whisper model
	model = whisper.load_model("base")

	# Transcribe the audio
	result = model.transcribe(audio)

	segments = []
	for segment in result["segments"]:
	segments.append({
	"file_name": audio_url.split("/")[-1], # Extract filename from URL
	"text": segment["text"]
	})
	return segments

	def process_audio_data(audio: np.ndarray, file_name: str) -> List[Dict[str, Any]]:
	# Load the Whisper model
	model = whisper.load_model("base")

	# Transcribe the audio
	result = model.transcribe(audio)

	segments = []
	for segment in result["segments"]:
	segments.append({
	"file_name": file_name, # Ensure file_name is added
	"text": segment["text"]
	})
	return segments