Varun Aditya Balaji
commited on
Commit
Β·
a216922
1
Parent(s):
836ffb0
first commit
Browse files- 2.wav +0 -0
- 3.wav +0 -0
- Pipeline.ipynb +68 -40
- chunk0.wav +0 -0
- chunk1.wav +0 -0
- chunk2.wav +0 -0
- chunk3.wav +0 -0
- chunk4.wav +0 -0
- chunk5.wav +0 -0
- chunk6.wav +0 -0
- combine.wav +0 -0
- tmp/classifier.ckpt +1 -0
- tmp/embedding_model.ckpt +1 -0
- tmp/hyperparams.yaml +1 -0
- tmp/label_encoder.ckpt +1 -0
2.wav
ADDED
Binary file (740 kB). View file
|
|
3.wav
ADDED
Binary file (436 kB). View file
|
|
Pipeline.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "edc2e2ff",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
@@ -19,7 +19,7 @@
|
|
19 |
},
|
20 |
{
|
21 |
"cell_type": "code",
|
22 |
-
"execution_count":
|
23 |
"id": "76f25cc3",
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
@@ -27,7 +27,7 @@
|
|
27 |
"name": "stdout",
|
28 |
"output_type": "stream",
|
29 |
"text": [
|
30 |
-
"
|
31 |
]
|
32 |
},
|
33 |
{
|
@@ -41,8 +41,8 @@
|
|
41 |
"name": "stdout",
|
42 |
"output_type": "stream",
|
43 |
"text": [
|
44 |
-
"
|
45 |
-
"
|
46 |
]
|
47 |
}
|
48 |
],
|
@@ -55,7 +55,7 @@
|
|
55 |
},
|
56 |
{
|
57 |
"cell_type": "code",
|
58 |
-
"execution_count":
|
59 |
"id": "3b142546",
|
60 |
"metadata": {},
|
61 |
"outputs": [],
|
@@ -82,8 +82,8 @@
|
|
82 |
},
|
83 |
{
|
84 |
"cell_type": "code",
|
85 |
-
"execution_count":
|
86 |
-
"id": "
|
87 |
"metadata": {},
|
88 |
"outputs": [
|
89 |
{
|
@@ -98,27 +98,34 @@
|
|
98 |
"output_type": "stream",
|
99 |
"text": [
|
100 |
"Detected language is English\n",
|
101 |
-
"
|
102 |
]
|
103 |
}
|
104 |
],
|
105 |
"source": [
|
106 |
-
"
|
107 |
-
"
|
108 |
-
"
|
109 |
-
" "
|
110 |
]
|
111 |
},
|
112 |
{
|
113 |
"cell_type": "code",
|
114 |
-
"execution_count":
|
115 |
-
"id": "
|
116 |
"metadata": {},
|
117 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
{
|
119 |
"name": "stderr",
|
120 |
"output_type": "stream",
|
121 |
"text": [
|
|
|
122 |
"It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
|
123 |
]
|
124 |
},
|
@@ -126,36 +133,57 @@
|
|
126 |
"name": "stdout",
|
127 |
"output_type": "stream",
|
128 |
"text": [
|
129 |
-
"
|
130 |
-
"
|
131 |
]
|
132 |
-
}
|
133 |
-
],
|
134 |
-
"source": [
|
135 |
-
"start = time.time()\n",
|
136 |
-
"pipeline('english.wav')\n",
|
137 |
-
"end = time.time()"
|
138 |
-
]
|
139 |
-
},
|
140 |
-
{
|
141 |
-
"cell_type": "code",
|
142 |
-
"execution_count": 49,
|
143 |
-
"id": "1e0321b5",
|
144 |
-
"metadata": {},
|
145 |
-
"outputs": [
|
146 |
{
|
147 |
-
"
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
}
|
156 |
],
|
157 |
"source": [
|
158 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
]
|
160 |
},
|
161 |
{
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 92,
|
6 |
"id": "edc2e2ff",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
|
|
19 |
},
|
20 |
{
|
21 |
"cell_type": "code",
|
22 |
+
"execution_count": 93,
|
23 |
"id": "76f25cc3",
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
|
|
27 |
"name": "stdout",
|
28 |
"output_type": "stream",
|
29 |
"text": [
|
30 |
+
"12/06/2022 13:42:19 - INFO - huggingsound.speech_recognition.model - Loading model...\n"
|
31 |
]
|
32 |
},
|
33 |
{
|
|
|
41 |
"name": "stdout",
|
42 |
"output_type": "stream",
|
43 |
"text": [
|
44 |
+
"12/06/2022 13:42:23 - WARNING - root - bos_token <s> not in provided tokens. It will be added to the list of tokens\n",
|
45 |
+
"12/06/2022 13:42:23 - WARNING - root - eos_token </s> not in provided tokens. It will be added to the list of tokens\n"
|
46 |
]
|
47 |
}
|
48 |
],
|
|
|
55 |
},
|
56 |
{
|
57 |
"cell_type": "code",
|
58 |
+
"execution_count": 94,
|
59 |
"id": "3b142546",
|
60 |
"metadata": {},
|
61 |
"outputs": [],
|
|
|
82 |
},
|
83 |
{
|
84 |
"cell_type": "code",
|
85 |
+
"execution_count": 95,
|
86 |
+
"id": "b0fae1dd",
|
87 |
"metadata": {},
|
88 |
"outputs": [
|
89 |
{
|
|
|
98 |
"output_type": "stream",
|
99 |
"text": [
|
100 |
"Detected language is English\n",
|
101 |
+
"NISHE JUAN FANMA HE MOVED ABOUT INVISIBLE BUT EVERYONE COULD HEAR HIM\n"
|
102 |
]
|
103 |
}
|
104 |
],
|
105 |
"source": [
|
106 |
+
"start = time.time()\n",
|
107 |
+
"pipeline('combine.wav')\n",
|
108 |
+
"end = time.time()"
|
|
|
109 |
]
|
110 |
},
|
111 |
{
|
112 |
"cell_type": "code",
|
113 |
+
"execution_count": 96,
|
114 |
+
"id": "1e0321b5",
|
115 |
"metadata": {},
|
116 |
"outputs": [
|
117 |
+
{
|
118 |
+
"name": "stdout",
|
119 |
+
"output_type": "stream",
|
120 |
+
"text": [
|
121 |
+
"Detected Language is Chinese\n"
|
122 |
+
]
|
123 |
+
},
|
124 |
{
|
125 |
"name": "stderr",
|
126 |
"output_type": "stream",
|
127 |
"text": [
|
128 |
+
"100%|βββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:00<00:00, 1.28it/s]\n",
|
129 |
"It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
|
130 |
]
|
131 |
},
|
|
|
133 |
"name": "stdout",
|
134 |
"output_type": "stream",
|
135 |
"text": [
|
136 |
+
"δ½ εζ¬’ι₯ε\n",
|
137 |
+
"Detected language is English\n"
|
138 |
]
|
139 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
{
|
141 |
+
"name": "stderr",
|
142 |
+
"output_type": "stream",
|
143 |
+
"text": [
|
144 |
+
"It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "stdout",
|
149 |
+
"output_type": "stream",
|
150 |
+
"text": [
|
151 |
+
"\n",
|
152 |
+
"Detected language is English\n"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"name": "stderr",
|
157 |
+
"output_type": "stream",
|
158 |
+
"text": [
|
159 |
+
"It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"name": "stdout",
|
164 |
+
"output_type": "stream",
|
165 |
+
"text": [
|
166 |
+
"HE MOVED ABOUT\n",
|
167 |
+
"Detected language is English\n",
|
168 |
+
"INVISIBLE BUT EVERYONE COULD HEAR HIM\n"
|
169 |
+
]
|
170 |
}
|
171 |
],
|
172 |
"source": [
|
173 |
+
"from pydub import AudioSegment\n",
|
174 |
+
"from pydub.silence import split_on_silence\n",
|
175 |
+
"\n",
|
176 |
+
"sound_file = AudioSegment.from_wav(\"combine.wav\")\n",
|
177 |
+
"audio_chunks = split_on_silence(sound_file, \n",
|
178 |
+
" min_silence_len=100,\n",
|
179 |
+
" silence_thresh=-50\n",
|
180 |
+
")\n",
|
181 |
+
"\n",
|
182 |
+
"for i, chunk in enumerate(audio_chunks):\n",
|
183 |
+
"\n",
|
184 |
+
" out_file = \"./chunk{0}.wav\".format(i)\n",
|
185 |
+
" chunk.export(out_file, format=\"wav\")\n",
|
186 |
+
" pipeline(out_file)"
|
187 |
]
|
188 |
},
|
189 |
{
|
chunk0.wav
ADDED
Binary file (196 kB). View file
|
|
chunk1.wav
ADDED
Binary file (21.3 kB). View file
|
|
chunk2.wav
ADDED
Binary file (96.6 kB). View file
|
|
chunk3.wav
ADDED
Binary file (188 kB). View file
|
|
chunk4.wav
ADDED
Binary file (79.3 kB). View file
|
|
chunk5.wav
ADDED
Binary file (18.5 kB). View file
|
|
chunk6.wav
ADDED
Binary file (189 kB). View file
|
|
combine.wav
ADDED
Binary file (603 kB). View file
|
|
tmp/classifier.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/classifier.ckpt
|
tmp/embedding_model.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/embedding_model.ckpt
|
tmp/hyperparams.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/hyperparams.yaml
|
tmp/label_encoder.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/varun/.cache/huggingface/hub/models--speechbrain--lang-id-voxlingua107-ecapa/snapshots/d771b530cec097adc0088b4dbd173e242f895464/label_encoder.txt
|