Hev832 commited on
Commit
3925892
·
verified ·
1 Parent(s): cccb6a1

Create stream.py

Browse files
Files changed (1) hide show
  1. stream.py +141 -0
stream.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write, read
5
+ import numpy as np
6
+ import yt_dlp
7
+ import subprocess
8
+ from pydub import AudioSegment
9
+ from audio_separator.separator import Separator
10
+ from lib.infer import infer_audio
11
+ import edge_tts
12
+ import tempfile
13
+ import anyio
14
+ from pathlib import Path
15
+ from lib.language_tts import language_dict
16
+ import zipfile
17
+ import shutil
18
+ import urllib.request
19
+ import gdown
20
+ import streamlit as st
21
+
22
+ main_dir = Path().resolve()
23
+ print(main_dir)
24
+ os.chdir(main_dir)
25
+ models_dir = "models"
26
+
27
+ # Download audio using yt-dlp
28
+ def download_audio(url):
29
+ ydl_opts = {
30
+ 'format': 'bestaudio/best',
31
+ 'outtmpl': 'ytdl/%(title)s.%(ext)s',
32
+ 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}],
33
+ }
34
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
+ info_dict = ydl.extract_info(url, download=True)
36
+ file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
37
+ sample_rate, audio_data = read(file_path)
38
+ audio_array = np.asarray(audio_data, dtype=np.int16)
39
+ return sample_rate, audio_array
40
+
41
+
42
+ def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc):
43
+ if not os.path.exists(output_dir):
44
+ os.makedirs(output_dir)
45
+
46
+ separator = Separator(output_dir=output_dir)
47
+
48
+ vocals = os.path.join(output_dir, 'Vocals.wav')
49
+ instrumental = os.path.join(output_dir, 'Instrumental.wav')
50
+ vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav')
51
+ vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav')
52
+ lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav')
53
+ backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav')
54
+
55
+ separator.load_model(model_filename=model_voc_inst)
56
+ voc_inst = separator.separate(input_audio)
57
+ os.rename(os.path.join(output_dir, voc_inst[0]), instrumental)
58
+ os.rename(os.path.join(output_dir, voc_inst[1]), vocals)
59
+
60
+ separator.load_model(model_filename=model_deecho)
61
+ voc_no_reverb = separator.separate(vocals)
62
+ os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb)
63
+ os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb)
64
+
65
+ separator.load_model(model_filename=model_back_voc)
66
+ backing_voc = separator.separate(vocals_no_reverb)
67
+ os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals)
68
+ os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals)
69
+
70
+ return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals
71
+
72
+
73
+ async def text_to_speech_edge(text, language_code):
74
+ voice = language_dict.get(language_code, "default_voice")
75
+ communicate = edge_tts.Communicate(text, voice)
76
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
77
+ tmp_path = tmp_file.name
78
+ await communicate.save(tmp_path)
79
+ return tmp_path
80
+
81
+
82
+ # Streamlit UI
83
+ st.title("Hex RVC")
84
+
85
+ tabs = st.tabs(["Inference", "Download RVC Model", "Audio Separation"])
86
+
87
+ # Inference Tab
88
+ with tabs[0]:
89
+ st.header("Inference")
90
+
91
+ model_name = st.text_input("Model Name", placeholder="Enter model name")
92
+ sound_path = st.text_input("Audio Path (Optional)", placeholder="Leave blank to upload audio")
93
+ uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
94
+
95
+ if uploaded_audio is not None:
96
+ with open("uploaded_audio.wav", "wb") as f:
97
+ f.write(uploaded_audio.read())
98
+ sound_path = "uploaded_audio.wav"
99
+
100
+ f0_change = st.number_input("Pitch Change (semitones)", value=0)
101
+ f0_method = st.selectbox("F0 Method", ["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "hybrid[rmvpe+fcpe]"], index=5)
102
+
103
+ if st.button("Run Inference"):
104
+ st.write("Running inference...")
105
+
106
+ # Download RVC Model Tab
107
+ with tabs[1]:
108
+ st.header("Download RVC Model")
109
+ url = st.text_input("Model URL")
110
+ dir_name = st.text_input("Model Name")
111
+
112
+ if st.button("Download Model"):
113
+ try:
114
+ download_online_model(url, dir_name)
115
+ st.success(f"Model {dir_name} downloaded successfully!")
116
+ except Exception as e:
117
+ st.error(str(e))
118
+
119
+ # Audio Separation Tab
120
+ with tabs[2]:
121
+ st.header("Audio Separation")
122
+ input_audio = st.file_uploader("Upload Audio for Separation", type=["wav", "mp3"])
123
+
124
+ if input_audio is not None:
125
+ with open("input_audio.wav", "wb") as f:
126
+ f.write(input_audio.read())
127
+ st.write("Audio uploaded successfully.")
128
+
129
+ if st.button("Separate Audio"):
130
+ st.write("Separating audio...")
131
+ output_dir = "./separated_audio"
132
+ inst, voc, voc_rev, voc_no_rev, lead_voc, back_voc = separate_audio("input_audio.wav", output_dir,
133
+ 'model_bs_roformer.ckpt',
134
+ 'UVR-DeEcho-DeReverb.pth',
135
+ 'mel_band_karaoke.ckpt')
136
+ st.audio(inst)
137
+ st.audio(voc)
138
+ st.audio(voc_rev)
139
+ st.audio(voc_no_rev)
140
+ st.audio(lead_voc)
141
+ st.audio(back_voc)