Spaces:
Paused
Paused
Upload 6 files
Browse files- app.py +36 -11
- discord-bot.py +112 -0
- docker-compose.yaml +9 -0
- dockerfile +24 -0
- requirements.txt +2 -1
- start.sh +7 -0
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings
|
4 |
import tempfile
|
5 |
import speech_recognition as sr
|
6 |
from pydub import AudioSegment
|
@@ -10,9 +10,25 @@ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
|
10 |
set_api_key(ELEVENLABS_API_KEY)
|
11 |
|
12 |
def get_available_voices():
|
13 |
-
"""Fetch
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def text_to_speech(text, voice_name, stability, clarity, style):
|
18 |
"""Convert text to speech using selected voice and parameters"""
|
@@ -32,10 +48,14 @@ def text_to_speech(text, voice_name, stability, clarity, style):
|
|
32 |
)
|
33 |
)
|
34 |
|
|
|
|
|
|
|
|
|
35 |
# Save audio to temporary file
|
36 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
37 |
temp_file.write(audio)
|
38 |
-
return temp_file.name
|
39 |
|
40 |
def speech_to_text(audio_file):
|
41 |
"""Convert speech to text using speech recognition"""
|
@@ -61,11 +81,11 @@ def speech_to_speech(audio_file, voice_name, stability, clarity, style):
|
|
61 |
# First convert speech to text
|
62 |
text = speech_to_text(audio_file)
|
63 |
if text.startswith("Error") or text.startswith("Could not"):
|
64 |
-
return None, text
|
65 |
|
66 |
# Then convert text to speech
|
67 |
-
audio_output = text_to_speech(text, voice_name, stability, clarity, style)
|
68 |
-
return audio_output, text
|
69 |
|
70 |
# Get available voices
|
71 |
VOICE_LIST = get_available_voices()
|
@@ -74,6 +94,10 @@ VOICE_LIST = get_available_voices()
|
|
74 |
with gr.Blocks() as demo:
|
75 |
gr.Markdown("# ElevenLabs Voice Generation")
|
76 |
|
|
|
|
|
|
|
|
|
77 |
with gr.Tab("Text to Speech"):
|
78 |
with gr.Row():
|
79 |
with gr.Column():
|
@@ -89,17 +113,17 @@ with gr.Blocks() as demo:
|
|
89 |
|
90 |
with gr.Column():
|
91 |
audio_output = gr.Audio(label="Generated Audio")
|
|
|
92 |
|
93 |
convert_btn.click(
|
94 |
fn=text_to_speech,
|
95 |
inputs=[text_input, voice_dropdown, stability, clarity, style],
|
96 |
-
outputs=audio_output
|
97 |
)
|
98 |
|
99 |
with gr.Tab("Speech to Speech"):
|
100 |
with gr.Row():
|
101 |
with gr.Column():
|
102 |
-
# Updated Audio component initialization
|
103 |
audio_input = gr.Audio(label="Input Audio", sources=["microphone", "upload"])
|
104 |
voice_dropdown_s2s = gr.Dropdown(choices=list(VOICE_LIST.keys()), label="Select Voice")
|
105 |
|
@@ -113,11 +137,12 @@ with gr.Blocks() as demo:
|
|
113 |
with gr.Column():
|
114 |
text_output = gr.Textbox(label="Recognized Text", lines=3)
|
115 |
audio_output_s2s = gr.Audio(label="Generated Audio")
|
|
|
116 |
|
117 |
convert_btn_s2s.click(
|
118 |
fn=speech_to_speech,
|
119 |
inputs=[audio_input, voice_dropdown_s2s, stability_s2s, clarity_s2s, style_s2s],
|
120 |
-
outputs=[audio_output_s2s, text_output]
|
121 |
)
|
122 |
|
123 |
demo.launch()
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings, User
|
4 |
import tempfile
|
5 |
import speech_recognition as sr
|
6 |
from pydub import AudioSegment
|
|
|
10 |
set_api_key(ELEVENLABS_API_KEY)
|
11 |
|
12 |
def get_available_voices():
|
13 |
+
"""Fetch only custom voices from ElevenLabs account"""
|
14 |
+
all_voices = voices()
|
15 |
+
custom_voices = {voice.name: voice.voice_id for voice in all_voices if not voice.category == "premade"}
|
16 |
+
return custom_voices
|
17 |
+
|
18 |
+
def get_remaining_credits():
|
19 |
+
"""Get remaining character credits from ElevenLabs"""
|
20 |
+
user = User.from_api()
|
21 |
+
subscription = user.subscription
|
22 |
+
return {
|
23 |
+
"character_count": subscription.character_count,
|
24 |
+
"character_limit": subscription.character_limit
|
25 |
+
}
|
26 |
+
|
27 |
+
def format_credits_message(credits_info):
|
28 |
+
"""Format credits information into a readable message"""
|
29 |
+
used = credits_info["character_limit"] - credits_info["character_count"]
|
30 |
+
total = credits_info["character_limit"]
|
31 |
+
return f"Credits: {credits_info['character_count']} / {total} characters remaining ({used} used)"
|
32 |
|
33 |
def text_to_speech(text, voice_name, stability, clarity, style):
|
34 |
"""Convert text to speech using selected voice and parameters"""
|
|
|
48 |
)
|
49 |
)
|
50 |
|
51 |
+
# Get updated credits
|
52 |
+
credits_info = get_remaining_credits()
|
53 |
+
credits_message = format_credits_message(credits_info)
|
54 |
+
|
55 |
# Save audio to temporary file
|
56 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
|
57 |
temp_file.write(audio)
|
58 |
+
return temp_file.name, credits_message
|
59 |
|
60 |
def speech_to_text(audio_file):
|
61 |
"""Convert speech to text using speech recognition"""
|
|
|
81 |
# First convert speech to text
|
82 |
text = speech_to_text(audio_file)
|
83 |
if text.startswith("Error") or text.startswith("Could not"):
|
84 |
+
return None, text, ""
|
85 |
|
86 |
# Then convert text to speech
|
87 |
+
audio_output, credits_message = text_to_speech(text, voice_name, stability, clarity, style)
|
88 |
+
return audio_output, text, credits_message
|
89 |
|
90 |
# Get available voices
|
91 |
VOICE_LIST = get_available_voices()
|
|
|
94 |
with gr.Blocks() as demo:
|
95 |
gr.Markdown("# ElevenLabs Voice Generation")
|
96 |
|
97 |
+
# Display current credits
|
98 |
+
credits_info = get_remaining_credits()
|
99 |
+
credits_display = gr.Markdown(format_credits_message(credits_info))
|
100 |
+
|
101 |
with gr.Tab("Text to Speech"):
|
102 |
with gr.Row():
|
103 |
with gr.Column():
|
|
|
113 |
|
114 |
with gr.Column():
|
115 |
audio_output = gr.Audio(label="Generated Audio")
|
116 |
+
credits_output = gr.Markdown()
|
117 |
|
118 |
convert_btn.click(
|
119 |
fn=text_to_speech,
|
120 |
inputs=[text_input, voice_dropdown, stability, clarity, style],
|
121 |
+
outputs=[audio_output, credits_output]
|
122 |
)
|
123 |
|
124 |
with gr.Tab("Speech to Speech"):
|
125 |
with gr.Row():
|
126 |
with gr.Column():
|
|
|
127 |
audio_input = gr.Audio(label="Input Audio", sources=["microphone", "upload"])
|
128 |
voice_dropdown_s2s = gr.Dropdown(choices=list(VOICE_LIST.keys()), label="Select Voice")
|
129 |
|
|
|
137 |
with gr.Column():
|
138 |
text_output = gr.Textbox(label="Recognized Text", lines=3)
|
139 |
audio_output_s2s = gr.Audio(label="Generated Audio")
|
140 |
+
credits_output_s2s = gr.Markdown()
|
141 |
|
142 |
convert_btn_s2s.click(
|
143 |
fn=speech_to_speech,
|
144 |
inputs=[audio_input, voice_dropdown_s2s, stability_s2s, clarity_s2s, style_s2s],
|
145 |
+
outputs=[audio_output_s2s, text_output, credits_output_s2s]
|
146 |
)
|
147 |
|
148 |
demo.launch()
|
discord-bot.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import discord
|
3 |
+
from discord import app_commands
|
4 |
+
from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings, User
|
5 |
+
|
6 |
+
# Set up Discord intents
|
7 |
+
intents = discord.Intents.default()
|
8 |
+
client = discord.Client(intents=intents)
|
9 |
+
tree = app_commands.CommandTree(client)
|
10 |
+
|
11 |
+
# Set your ElevenLabs API key
|
12 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
13 |
+
set_api_key(ELEVENLABS_API_KEY)
|
14 |
+
|
15 |
+
def get_available_voices():
|
16 |
+
"""Fetch only custom voices from ElevenLabs account"""
|
17 |
+
all_voices = voices()
|
18 |
+
return {voice.name: voice.voice_id for voice in all_voices if not voice.category == "premade"}
|
19 |
+
|
20 |
+
def get_remaining_credits():
|
21 |
+
"""Get remaining character credits from ElevenLabs"""
|
22 |
+
user = User.from_api()
|
23 |
+
subscription = user.subscription
|
24 |
+
return {
|
25 |
+
"character_count": subscription.character_count,
|
26 |
+
"character_limit": subscription.character_limit
|
27 |
+
}
|
28 |
+
|
29 |
+
@tree.command(name="voice", description="Voice generation commands")
|
30 |
+
@app_commands.describe(
|
31 |
+
action="Action to perform (list or create)",
|
32 |
+
text="Text to convert to speech",
|
33 |
+
voice_name="Name of the voice to use",
|
34 |
+
stability="Stability value (0-1)",
|
35 |
+
clarity="Clarity value (0-1)",
|
36 |
+
style="Style value (0-1)"
|
37 |
+
)
|
38 |
+
async def voice(
|
39 |
+
interaction: discord.Interaction,
|
40 |
+
action: str,
|
41 |
+
text: str = None,
|
42 |
+
voice_name: str = None,
|
43 |
+
stability: float = 0.5,
|
44 |
+
clarity: float = 0.75,
|
45 |
+
style: float = 0.5
|
46 |
+
):
|
47 |
+
await interaction.response.defer()
|
48 |
+
|
49 |
+
if action.lower() == "list":
|
50 |
+
available_voices = get_available_voices()
|
51 |
+
voice_list = "\n".join([f"• {name}" for name in available_voices.keys()])
|
52 |
+
credits_info = get_remaining_credits()
|
53 |
+
credits_msg = f"\nCredits remaining: {credits_info['character_count']} / {credits_info['character_limit']}"
|
54 |
+
|
55 |
+
await interaction.followup.send(f"Available voices:\n{voice_list}{credits_msg}")
|
56 |
+
|
57 |
+
elif action.lower() == "create":
|
58 |
+
if not all([text, voice_name]):
|
59 |
+
await interaction.followup.send("Please provide both text and voice name.")
|
60 |
+
return
|
61 |
+
|
62 |
+
available_voices = get_available_voices()
|
63 |
+
if voice_name not in available_voices:
|
64 |
+
await interaction.followup.send(f"Voice '{voice_name}' not found. Use /voice list to see available voices.")
|
65 |
+
return
|
66 |
+
|
67 |
+
try:
|
68 |
+
voice_settings = VoiceSettings(
|
69 |
+
stability=stability,
|
70 |
+
similarity_boost=clarity,
|
71 |
+
style=style,
|
72 |
+
use_speaker_boost=True
|
73 |
+
)
|
74 |
+
|
75 |
+
audio = generate(
|
76 |
+
text=text,
|
77 |
+
voice=Voice(
|
78 |
+
voice_id=available_voices[voice_name],
|
79 |
+
settings=voice_settings
|
80 |
+
)
|
81 |
+
)
|
82 |
+
|
83 |
+
# Save audio to temporary file
|
84 |
+
with open("temp.mp3", "wb") as f:
|
85 |
+
f.write(audio)
|
86 |
+
|
87 |
+
# Get updated credits
|
88 |
+
credits_info = get_remaining_credits()
|
89 |
+
credits_msg = f"Credits remaining: {credits_info['character_count']} / {credits_info['character_limit']}"
|
90 |
+
|
91 |
+
await interaction.followup.send(
|
92 |
+
f"Generated audio with voice '{voice_name}'\n{credits_msg}",
|
93 |
+
file=discord.File("temp.mp3")
|
94 |
+
)
|
95 |
+
|
96 |
+
# Clean up
|
97 |
+
os.remove("temp.mp3")
|
98 |
+
|
99 |
+
except Exception as e:
|
100 |
+
await interaction.followup.send(f"Error generating audio: {str(e)}")
|
101 |
+
|
102 |
+
else:
|
103 |
+
await interaction.followup.send("Invalid action. Use 'list' or 'create'.")
|
104 |
+
|
105 |
+
@client.event
|
106 |
+
async def on_ready():
|
107 |
+
await tree.sync()
|
108 |
+
print(f"Bot is ready and logged in as {client.user}")
|
109 |
+
|
110 |
+
# Run the bot
|
111 |
+
DISCORD_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
|
112 |
+
client.run(DISCORD_TOKEN)
|
docker-compose.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3'
|
2 |
+
services:
|
3 |
+
app:
|
4 |
+
build: .
|
5 |
+
ports:
|
6 |
+
- "7860:7860"
|
7 |
+
environment:
|
8 |
+
- GRADIO_SERVER_PORT=7860
|
9 |
+
- GRADIO_SERVER_NAME=0.0.0.0
|
dockerfile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
ffmpeg \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
# Copy requirements first to leverage Docker cache
|
11 |
+
COPY requirements.txt .
|
12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
13 |
+
|
14 |
+
# Copy your application
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
# Make start script executable
|
18 |
+
RUN chmod +x start.sh
|
19 |
+
|
20 |
+
# Expose the port Gradio will run on
|
21 |
+
EXPOSE 7860
|
22 |
+
|
23 |
+
# Run both applications
|
24 |
+
CMD ["./start.sh"]
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ elevenlabs==0.2.27
|
|
3 |
SpeechRecognition==3.10.1
|
4 |
pydub==0.25.1
|
5 |
ffmpeg-python==0.2.0
|
6 |
-
python-multipart==0.0.9
|
|
|
|
3 |
SpeechRecognition==3.10.1
|
4 |
pydub==0.25.1
|
5 |
ffmpeg-python==0.2.0
|
6 |
+
python-multipart==0.0.9
|
7 |
+
discord.py==2.3.2
|
start.sh
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Start the Gradio app in the background
|
4 |
+
python app.py &
|
5 |
+
|
6 |
+
# Start the Discord bot
|
7 |
+
python discord_bot.py
|