Spaces:
Running
Running
sanchit-gandhi
commited on
Commit
·
d3e0df2
1
Parent(s):
f9dc7b0
use byte64 encoding for faster file transfer
Browse files
app.py
CHANGED
@@ -1,6 +1,9 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
from transformers.models.whisper.tokenization_whisper import TO_LANGUAGE_CODE
|
|
|
4 |
|
5 |
|
6 |
title = "Whisper JAX: The Fastest Whisper API ⚡️"
|
@@ -10,7 +13,7 @@ description = "Whisper JAX is an optimised implementation of the [Whisper model]
|
|
10 |
|
11 |
API_URL = "https://whisper-jax.ngrok.io/generate/"
|
12 |
|
13 |
-
article = "Whisper large-v2 model by OpenAI. Backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme."
|
14 |
|
15 |
language_names = sorted(TO_LANGUAGE_CODE.keys())
|
16 |
SAMPLING_RATE = 16000
|
@@ -56,7 +59,11 @@ def transcribe_audio(microphone, file_upload, task, return_timestamps):
|
|
56 |
|
57 |
inputs = microphone if microphone is not None else file_upload
|
58 |
|
59 |
-
inputs
|
|
|
|
|
|
|
|
|
60 |
|
61 |
text, timestamps = inference(inputs=inputs, task=task, return_timestamps=return_timestamps)
|
62 |
|
@@ -83,8 +90,8 @@ def transcribe_youtube(yt_url, task, return_timestamps):
|
|
83 |
audio = gr.Interface(
|
84 |
fn=transcribe_audio,
|
85 |
inputs=[
|
86 |
-
gr.inputs.Audio(source="microphone", optional=True),
|
87 |
-
gr.inputs.Audio(source="upload", optional=True),
|
88 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
89 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
90 |
],
|
|
|
1 |
+
import base64
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import requests
|
5 |
from transformers.models.whisper.tokenization_whisper import TO_LANGUAGE_CODE
|
6 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
7 |
|
8 |
|
9 |
title = "Whisper JAX: The Fastest Whisper API ⚡️"
|
|
|
13 |
|
14 |
API_URL = "https://whisper-jax.ngrok.io/generate/"
|
15 |
|
16 |
+
article = "Whisper large-v2 model by OpenAI. Backend running JAX on a TPU v4-8 through the generous support of the [TRC](https://sites.research.google/trc/about/) programme. Whisper JAX code and Gradio demo by 🤗 Hugging Face."
|
17 |
|
18 |
language_names = sorted(TO_LANGUAGE_CODE.keys())
|
19 |
SAMPLING_RATE = 16000
|
|
|
59 |
|
60 |
inputs = microphone if microphone is not None else file_upload
|
61 |
|
62 |
+
with open(inputs, "rb") as f:
|
63 |
+
inputs = f.read()
|
64 |
+
|
65 |
+
inputs = ffmpeg_read(inputs, SAMPLING_RATE)
|
66 |
+
inputs = {"array": base64.b64encode(inputs.tobytes()), "sampling_rate": SAMPLING_RATE}
|
67 |
|
68 |
text, timestamps = inference(inputs=inputs, task=task, return_timestamps=return_timestamps)
|
69 |
|
|
|
90 |
audio = gr.Interface(
|
91 |
fn=transcribe_audio,
|
92 |
inputs=[
|
93 |
+
gr.inputs.Audio(source="microphone", optional=True, type="filepath"),
|
94 |
+
gr.inputs.Audio(source="upload", optional=True, type="filepath"),
|
95 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
96 |
gr.inputs.Checkbox(default=False, label="Return timestamps"),
|
97 |
],
|