WillHeld commited on
Commit
f7e8ea0
·
0 Parent(s):

Try Starting Space from Scratch

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +2 -0
  3. README.md +13 -0
  4. app.py +213 -0
  5. files.txt +0 -0
  6. packages.txt +1 -0
  7. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *__pycache__*
2
+ user_study.json
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Diva Audio
3
+ emoji: 🔊
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mpl-2.0
11
+ ---
12
+
13
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import os
3
+ import random
4
+ import sys
5
+
6
+ import xxhash
7
+ import gradio as gr
8
+ import librosa
9
+ import numpy as np
10
+ import soundfile as sf
11
+ import torch
12
+ import torch.nn.functional as F
13
+ from accelerate import infer_auto_device_map
14
+ from datasets import Audio
15
+ from safetensors.torch import load, load_model
16
+ import spaces
17
+ from torch import nn
18
+ from transformers import (
19
+ AutoModelForCausalLM,
20
+ AutoProcessor,
21
+ AutoTokenizer,
22
+ LlamaForCausalLM,
23
+ TextIteratorStreamer,
24
+ WhisperForConditionalGeneration,
25
+ AutoProcessor,
26
+ AutoModel,
27
+ )
28
+ from transformers.generation import GenerationConfig
29
+
30
+ anonymous = False
31
+
32
+ diva_model = AutoModel.from_pretrained(
33
+ "WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True
34
+ )
35
+
36
+ resampler = Audio(sampling_rate=16_000)
37
+
38
+
39
+ @spaces.GPU
40
+ @torch.no_grad
41
+ def diva_audio(audio_input, do_sample=False, temperature=0.001):
42
+ sr, y = audio_input
43
+ x = xxhash.xxh32(bytes(y)).hexdigest()
44
+ y = y.astype(np.float32)
45
+ y /= np.max(np.abs(y))
46
+ a = resampler.decode_example(
47
+ resampler.encode_example({"array": y, "sampling_rate": sr})
48
+ )
49
+ yield from diva_model.generate_stream(
50
+ a["array"], None, do_sample=do_sample, max_new_tokens=256
51
+ )
52
+
53
+
54
+ def transcribe_wrapper(audio_input, state, model_order):
55
+ spinner = "◒"
56
+ d_resp = gr.Textbox(
57
+ value="♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪loading♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪loading♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪♫♪",
58
+ visible=True,
59
+ label=model_names[0] if not anonymous else f"Model {order}",
60
+ )
61
+ yield (
62
+ gr.Button(
63
+ value="Loading Weights onto ZeroGPU...",
64
+ interactive=False,
65
+ variant="primary",
66
+ ),
67
+ d_resp,
68
+ state,
69
+ )
70
+
71
+ yield from transcribe(audio_input, state, model_order)
72
+
73
+
74
+ @spaces.GPU
75
+ def transcribe(audio_input, state, model_order):
76
+ if audio_input == None:
77
+ return (
78
+ "Click to run inference!",
79
+ "",
80
+ state,
81
+ )
82
+
83
+ def gen_from_diva():
84
+ diva_resp = diva_audio(audio_input)
85
+ for resp in diva_resp:
86
+ d_resp = gr.Textbox(
87
+ value=resp,
88
+ visible=True,
89
+ label=model_names[0] if not anonymous else f"Model {order}",
90
+ )
91
+ yield d_resp
92
+
93
+ spinner_id = 0
94
+ spinners = ["◐ ", "◓ ", "◑", "◒"]
95
+
96
+ for response in gen_from_diva():
97
+ spinner = spinners[spinner_id]
98
+ spinner_id = (spinner_id + 1) % 4
99
+ yield (
100
+ gr.Button(
101
+ value=spinner + " Generating Responses " + spinner,
102
+ interactive=False,
103
+ variant="primary",
104
+ ),
105
+ response,
106
+ state,
107
+ )
108
+ yield (
109
+ gr.Button(value="Click to run inference!", interactive=True, variant="primary"),
110
+ response,
111
+ state,
112
+ )
113
+
114
+
115
+ def on_page_load(state, model_order):
116
+ if state == 0:
117
+ gr.Info(
118
+ "Record something you'd say to an AI Assistant! Think about what you usually use Siri, Google Assistant, or ChatGPT for."
119
+ )
120
+ state = 1
121
+ if anonymous:
122
+ random.shuffle(model_order)
123
+ return state, model_order
124
+
125
+
126
+ def recording_complete(state):
127
+ if state == 1:
128
+ gr.Info(
129
+ "Once you submit your recording, DiVA will stream back a response! This might take a second as ZeroGPU needs to load model weights into vRAM!."
130
+ )
131
+ state = 2
132
+ return (
133
+ gr.Button(value="Click to run inference!", interactive=True, variant="primary"),
134
+ state,
135
+ )
136
+
137
+
138
+ def clear_factory(button_id):
139
+ def clear(audio_input, model_order):
140
+ return (
141
+ model_order,
142
+ gr.Button(
143
+ value="Record Audio to Submit!",
144
+ interactive=False,
145
+ ),
146
+ None,
147
+ None,
148
+ )
149
+
150
+ return clear
151
+
152
+
153
+ theme = gr.themes.Soft(
154
+ primary_hue=gr.themes.Color(
155
+ c100="#82000019",
156
+ c200="#82000033",
157
+ c300="#8200004c",
158
+ c400="#82000066",
159
+ c50="#8200007f",
160
+ c500="#8200007f",
161
+ c600="#82000099",
162
+ c700="#820000b2",
163
+ c800="#820000cc",
164
+ c900="#820000e5",
165
+ c950="#820000f2",
166
+ ),
167
+ secondary_hue="rose",
168
+ neutral_hue="stone",
169
+ )
170
+
171
+ model_names = ["DiVA Llama 3 8B"]
172
+ model_shorthand = ["diva"]
173
+ with gr.Blocks(theme=theme) as demo:
174
+ state = gr.State(0)
175
+ model_order = gr.State([0, 1])
176
+ with gr.Row():
177
+ audio_input = gr.Audio(
178
+ sources=["microphone"], streaming=False, label="Audio Input"
179
+ )
180
+
181
+ with gr.Row():
182
+ btn = gr.Button(value="Record Audio to Submit!", interactive=False)
183
+
184
+ with gr.Row():
185
+ out1 = gr.Textbox(visible=False)
186
+
187
+ audio_input.stop_recording(
188
+ recording_complete,
189
+ [state],
190
+ [btn, state],
191
+ )
192
+ audio_input.start_recording(
193
+ lambda: gr.Button(
194
+ value="Uploading Audio to Cloud", interactive=False, variant="primary"
195
+ ),
196
+ None,
197
+ btn,
198
+ )
199
+ btn.click(
200
+ fn=transcribe_wrapper,
201
+ inputs=[audio_input, state, model_order],
202
+ outputs=[btn, out1, state],
203
+ )
204
+ audio_input.clear(
205
+ clear_factory(None),
206
+ [audio_input, model_order],
207
+ [model_order, btn, audio_input, out1],
208
+ )
209
+ demo.load(
210
+ fn=on_page_load, inputs=[state, model_order], outputs=[state, model_order]
211
+ )
212
+
213
+ demo.launch(share=True)
files.txt ADDED
File without changes
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers==4.45.2
2
+ accelerate
3
+ peft
4
+ librosa
5
+ torchaudio