from fastapi import FastAPI, Request from fastapi.responses import JSONResponse import os from pydub import AudioSegment import aiofiles import faster_whisper # Initialize the FastAPI app app = FastAPI() # Initialize the model with GPU support model = faster_whisper.WhisperModel('ivrit-ai/faster-whisper-v2-d4') # Define file paths TEMP_FILE_PATH = "temp_audio_file.m4a" WAV_FILE_PATH = "temp_audio_file.wav" @app.post("/transcribe") async def transcribe(request: Request): # Stream the file directly to a temporary file on disk async with aiofiles.open(TEMP_FILE_PATH, 'wb') as out_file: async for chunk in request.stream(): await out_file.write(chunk) print("File saved successfully.") # Convert M4A to WAV try: audio = AudioSegment.from_file(TEMP_FILE_PATH, format="m4a") audio.export(WAV_FILE_PATH, format="wav") print("Conversion to WAV successful.") except Exception as e: print("Error during conversion:", e) return JSONResponse({"detail": "Error in audio conversion"}, status_code=400) # Transcribe the WAV audio file segments, _ = model.transcribe(WAV_FILE_PATH, language='he') transcribed_text = ' '.join([s.text for s in segments]) # Clean up temporary files os.remove(TEMP_FILE_PATH) os.remove(WAV_FILE_PATH) return JSONResponse({"transcribed_text": transcribed_text})