enhuiz commited on
Commit
693611a
·
0 Parent(s):
Files changed (7) hide show
  1. .gitignore +9 -0
  2. LICENSE +21 -0
  3. README.md +15 -0
  4. app.py +59 -0
  5. packages.txt +1 -0
  6. pyproject.toml +6 -0
  7. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ /data
2
+ /runs
3
+ /scripts
4
+ /dist
5
+ /build
6
+ /*.egg-info
7
+ /flagged
8
+ version.py
9
+ __pycache__
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Resemble AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Resemble Enhance
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.8.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Resemble Enhance
14
+
15
+ Resemble Enhance is an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement. It consists of two modules: a denoiser, which separates speech from a noisy audio, and an enhancer, which further boosts the perceptual audio quality by restoring audio distortions and extending the audio bandwidth. The two models are trained on high-quality 44.1kHz speech data that guarantees the enhancement of your speech with high quality.
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+
5
+ from resemble_enhance.enhancer.inference import denoise, enhance
6
+
7
+ if torch.cuda.is_available():
8
+ device = "cuda"
9
+ else:
10
+ device = "cpu"
11
+
12
+
13
+ def _fn(path, solver, nfe, tau, denoising):
14
+ if path is None:
15
+ return None, None
16
+
17
+ solver = solver.lower()
18
+ nfe = int(nfe)
19
+ lambd = 0.9 if denoising else 0.1
20
+
21
+ dwav, sr = torchaudio.load(path)
22
+ dwav = dwav.mean(dim=0)
23
+
24
+ wav1, new_sr = denoise(dwav, sr, device)
25
+ wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau)
26
+
27
+ wav1 = wav1.cpu().numpy()
28
+ wav2 = wav2.cpu().numpy()
29
+
30
+ return (new_sr, wav1), (new_sr, wav2)
31
+
32
+
33
+ def main():
34
+ inputs: list = [
35
+ gr.Audio(type="filepath", label="Input Audio"),
36
+ gr.Dropdown(choices=["Midpoint", "RK4", "Euler"], value="Midpoint", label="CFM ODE Solver"),
37
+ gr.Slider(minimum=1, maximum=128, value=64, step=1, label="CFM Number of Function Evaluations"),
38
+ gr.Slider(minimum=0, maximum=1, value=0.5, step=0.01, label="CFM Prior Temperature"),
39
+ gr.Checkbox(value=False, label="Denoise Before Enhancement"),
40
+ ]
41
+
42
+ outputs: list = [
43
+ gr.Audio(label="Output Denoised Audio"),
44
+ gr.Audio(label="Output Enhanced Audio"),
45
+ ]
46
+
47
+ interface = gr.Interface(
48
+ fn=_fn,
49
+ title="Resemble Enhance",
50
+ description="AI-driven audio enhancement for your audio files, powered by Resemble AI.",
51
+ inputs=inputs,
52
+ outputs=outputs,
53
+ )
54
+
55
+ interface.launch()
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libsox-dev
pyproject.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [tool.black]
2
+ line-length = 120
3
+ target-version = ['py310']
4
+
5
+ [tool.isort]
6
+ line_length = 120
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ resemble-enhance