naonauno commited on
Commit
01f44ce
·
verified ·
1 Parent(s): a228aa2

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +36 -11
  2. discord-bot.py +112 -0
  3. docker-compose.yaml +9 -0
  4. dockerfile +24 -0
  5. requirements.txt +2 -1
  6. start.sh +7 -0
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import gradio as gr
3
- from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings
4
  import tempfile
5
  import speech_recognition as sr
6
  from pydub import AudioSegment
@@ -10,9 +10,25 @@ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
10
  set_api_key(ELEVENLABS_API_KEY)
11
 
12
  def get_available_voices():
13
- """Fetch all available voices from ElevenLabs account"""
14
- available_voices = voices()
15
- return {voice.name: voice.voice_id for voice in available_voices}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def text_to_speech(text, voice_name, stability, clarity, style):
18
  """Convert text to speech using selected voice and parameters"""
@@ -32,10 +48,14 @@ def text_to_speech(text, voice_name, stability, clarity, style):
32
  )
33
  )
34
 
 
 
 
 
35
  # Save audio to temporary file
36
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
37
  temp_file.write(audio)
38
- return temp_file.name
39
 
40
  def speech_to_text(audio_file):
41
  """Convert speech to text using speech recognition"""
@@ -61,11 +81,11 @@ def speech_to_speech(audio_file, voice_name, stability, clarity, style):
61
  # First convert speech to text
62
  text = speech_to_text(audio_file)
63
  if text.startswith("Error") or text.startswith("Could not"):
64
- return None, text
65
 
66
  # Then convert text to speech
67
- audio_output = text_to_speech(text, voice_name, stability, clarity, style)
68
- return audio_output, text
69
 
70
  # Get available voices
71
  VOICE_LIST = get_available_voices()
@@ -74,6 +94,10 @@ VOICE_LIST = get_available_voices()
74
  with gr.Blocks() as demo:
75
  gr.Markdown("# ElevenLabs Voice Generation")
76
 
 
 
 
 
77
  with gr.Tab("Text to Speech"):
78
  with gr.Row():
79
  with gr.Column():
@@ -89,17 +113,17 @@ with gr.Blocks() as demo:
89
 
90
  with gr.Column():
91
  audio_output = gr.Audio(label="Generated Audio")
 
92
 
93
  convert_btn.click(
94
  fn=text_to_speech,
95
  inputs=[text_input, voice_dropdown, stability, clarity, style],
96
- outputs=audio_output
97
  )
98
 
99
  with gr.Tab("Speech to Speech"):
100
  with gr.Row():
101
  with gr.Column():
102
- # Updated Audio component initialization
103
  audio_input = gr.Audio(label="Input Audio", sources=["microphone", "upload"])
104
  voice_dropdown_s2s = gr.Dropdown(choices=list(VOICE_LIST.keys()), label="Select Voice")
105
 
@@ -113,11 +137,12 @@ with gr.Blocks() as demo:
113
  with gr.Column():
114
  text_output = gr.Textbox(label="Recognized Text", lines=3)
115
  audio_output_s2s = gr.Audio(label="Generated Audio")
 
116
 
117
  convert_btn_s2s.click(
118
  fn=speech_to_speech,
119
  inputs=[audio_input, voice_dropdown_s2s, stability_s2s, clarity_s2s, style_s2s],
120
- outputs=[audio_output_s2s, text_output]
121
  )
122
 
123
  demo.launch()
 
1
  import os
2
  import gradio as gr
3
+ from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings, User
4
  import tempfile
5
  import speech_recognition as sr
6
  from pydub import AudioSegment
 
10
  set_api_key(ELEVENLABS_API_KEY)
11
 
12
  def get_available_voices():
13
+ """Fetch only custom voices from ElevenLabs account"""
14
+ all_voices = voices()
15
+ custom_voices = {voice.name: voice.voice_id for voice in all_voices if not voice.category == "premade"}
16
+ return custom_voices
17
+
18
+ def get_remaining_credits():
19
+ """Get remaining character credits from ElevenLabs"""
20
+ user = User.from_api()
21
+ subscription = user.subscription
22
+ return {
23
+ "character_count": subscription.character_count,
24
+ "character_limit": subscription.character_limit
25
+ }
26
+
27
+ def format_credits_message(credits_info):
28
+ """Format credits information into a readable message"""
29
+ used = credits_info["character_limit"] - credits_info["character_count"]
30
+ total = credits_info["character_limit"]
31
+ return f"Credits: {credits_info['character_count']} / {total} characters remaining ({used} used)"
32
 
33
  def text_to_speech(text, voice_name, stability, clarity, style):
34
  """Convert text to speech using selected voice and parameters"""
 
48
  )
49
  )
50
 
51
+ # Get updated credits
52
+ credits_info = get_remaining_credits()
53
+ credits_message = format_credits_message(credits_info)
54
+
55
  # Save audio to temporary file
56
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
57
  temp_file.write(audio)
58
+ return temp_file.name, credits_message
59
 
60
  def speech_to_text(audio_file):
61
  """Convert speech to text using speech recognition"""
 
81
  # First convert speech to text
82
  text = speech_to_text(audio_file)
83
  if text.startswith("Error") or text.startswith("Could not"):
84
+ return None, text, ""
85
 
86
  # Then convert text to speech
87
+ audio_output, credits_message = text_to_speech(text, voice_name, stability, clarity, style)
88
+ return audio_output, text, credits_message
89
 
90
  # Get available voices
91
  VOICE_LIST = get_available_voices()
 
94
  with gr.Blocks() as demo:
95
  gr.Markdown("# ElevenLabs Voice Generation")
96
 
97
+ # Display current credits
98
+ credits_info = get_remaining_credits()
99
+ credits_display = gr.Markdown(format_credits_message(credits_info))
100
+
101
  with gr.Tab("Text to Speech"):
102
  with gr.Row():
103
  with gr.Column():
 
113
 
114
  with gr.Column():
115
  audio_output = gr.Audio(label="Generated Audio")
116
+ credits_output = gr.Markdown()
117
 
118
  convert_btn.click(
119
  fn=text_to_speech,
120
  inputs=[text_input, voice_dropdown, stability, clarity, style],
121
+ outputs=[audio_output, credits_output]
122
  )
123
 
124
  with gr.Tab("Speech to Speech"):
125
  with gr.Row():
126
  with gr.Column():
 
127
  audio_input = gr.Audio(label="Input Audio", sources=["microphone", "upload"])
128
  voice_dropdown_s2s = gr.Dropdown(choices=list(VOICE_LIST.keys()), label="Select Voice")
129
 
 
137
  with gr.Column():
138
  text_output = gr.Textbox(label="Recognized Text", lines=3)
139
  audio_output_s2s = gr.Audio(label="Generated Audio")
140
+ credits_output_s2s = gr.Markdown()
141
 
142
  convert_btn_s2s.click(
143
  fn=speech_to_speech,
144
  inputs=[audio_input, voice_dropdown_s2s, stability_s2s, clarity_s2s, style_s2s],
145
+ outputs=[audio_output_s2s, text_output, credits_output_s2s]
146
  )
147
 
148
  demo.launch()
discord-bot.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import discord
3
+ from discord import app_commands
4
+ from elevenlabs import set_api_key, voices, generate, Voice, VoiceSettings, User
5
+
6
+ # Set up Discord intents
7
+ intents = discord.Intents.default()
8
+ client = discord.Client(intents=intents)
9
+ tree = app_commands.CommandTree(client)
10
+
11
+ # Set your ElevenLabs API key
12
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
13
+ set_api_key(ELEVENLABS_API_KEY)
14
+
15
+ def get_available_voices():
16
+ """Fetch only custom voices from ElevenLabs account"""
17
+ all_voices = voices()
18
+ return {voice.name: voice.voice_id for voice in all_voices if not voice.category == "premade"}
19
+
20
+ def get_remaining_credits():
21
+ """Get remaining character credits from ElevenLabs"""
22
+ user = User.from_api()
23
+ subscription = user.subscription
24
+ return {
25
+ "character_count": subscription.character_count,
26
+ "character_limit": subscription.character_limit
27
+ }
28
+
29
+ @tree.command(name="voice", description="Voice generation commands")
30
+ @app_commands.describe(
31
+ action="Action to perform (list or create)",
32
+ text="Text to convert to speech",
33
+ voice_name="Name of the voice to use",
34
+ stability="Stability value (0-1)",
35
+ clarity="Clarity value (0-1)",
36
+ style="Style value (0-1)"
37
+ )
38
+ async def voice(
39
+ interaction: discord.Interaction,
40
+ action: str,
41
+ text: str = None,
42
+ voice_name: str = None,
43
+ stability: float = 0.5,
44
+ clarity: float = 0.75,
45
+ style: float = 0.5
46
+ ):
47
+ await interaction.response.defer()
48
+
49
+ if action.lower() == "list":
50
+ available_voices = get_available_voices()
51
+ voice_list = "\n".join([f"• {name}" for name in available_voices.keys()])
52
+ credits_info = get_remaining_credits()
53
+ credits_msg = f"\nCredits remaining: {credits_info['character_count']} / {credits_info['character_limit']}"
54
+
55
+ await interaction.followup.send(f"Available voices:\n{voice_list}{credits_msg}")
56
+
57
+ elif action.lower() == "create":
58
+ if not all([text, voice_name]):
59
+ await interaction.followup.send("Please provide both text and voice name.")
60
+ return
61
+
62
+ available_voices = get_available_voices()
63
+ if voice_name not in available_voices:
64
+ await interaction.followup.send(f"Voice '{voice_name}' not found. Use /voice list to see available voices.")
65
+ return
66
+
67
+ try:
68
+ voice_settings = VoiceSettings(
69
+ stability=stability,
70
+ similarity_boost=clarity,
71
+ style=style,
72
+ use_speaker_boost=True
73
+ )
74
+
75
+ audio = generate(
76
+ text=text,
77
+ voice=Voice(
78
+ voice_id=available_voices[voice_name],
79
+ settings=voice_settings
80
+ )
81
+ )
82
+
83
+ # Save audio to temporary file
84
+ with open("temp.mp3", "wb") as f:
85
+ f.write(audio)
86
+
87
+ # Get updated credits
88
+ credits_info = get_remaining_credits()
89
+ credits_msg = f"Credits remaining: {credits_info['character_count']} / {credits_info['character_limit']}"
90
+
91
+ await interaction.followup.send(
92
+ f"Generated audio with voice '{voice_name}'\n{credits_msg}",
93
+ file=discord.File("temp.mp3")
94
+ )
95
+
96
+ # Clean up
97
+ os.remove("temp.mp3")
98
+
99
+ except Exception as e:
100
+ await interaction.followup.send(f"Error generating audio: {str(e)}")
101
+
102
+ else:
103
+ await interaction.followup.send("Invalid action. Use 'list' or 'create'.")
104
+
105
+ @client.event
106
+ async def on_ready():
107
+ await tree.sync()
108
+ print(f"Bot is ready and logged in as {client.user}")
109
+
110
+ # Run the bot
111
+ DISCORD_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
112
+ client.run(DISCORD_TOKEN)
docker-compose.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ version: '3'
2
+ services:
3
+ app:
4
+ build: .
5
+ ports:
6
+ - "7860:7860"
7
+ environment:
8
+ - GRADIO_SERVER_PORT=7860
9
+ - GRADIO_SERVER_NAME=0.0.0.0
dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ ffmpeg \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first to leverage Docker cache
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy your application
15
+ COPY . .
16
+
17
+ # Make start script executable
18
+ RUN chmod +x start.sh
19
+
20
+ # Expose the port Gradio will run on
21
+ EXPOSE 7860
22
+
23
+ # Run both applications
24
+ CMD ["./start.sh"]
requirements.txt CHANGED
@@ -3,4 +3,5 @@ elevenlabs==0.2.27
3
  SpeechRecognition==3.10.1
4
  pydub==0.25.1
5
  ffmpeg-python==0.2.0
6
- python-multipart==0.0.9
 
 
3
  SpeechRecognition==3.10.1
4
  pydub==0.25.1
5
  ffmpeg-python==0.2.0
6
+ python-multipart==0.0.9
7
+ discord.py==2.3.2
start.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start the Gradio app in the background
4
+ python app.py &
5
+
6
+ # Start the Discord bot
7
+ python discord_bot.py