shangrilar commited on
Commit
a55438f
Β·
1 Parent(s): fc5c0c0

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +34 -0
  2. utils.py +256 -0
  3. voice.py +16 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ from utils import get_story, get_voice, get_music
5
+
6
+ with gr.Blocks() as performance:
7
+ with gr.Tab("Story Generation"):
8
+ chatbot = gr.Chatbot(label='GPT4', elem_id="chatbot")
9
+ input_text = gr.Textbox(lines=2, label="μ‹œμž‘ λ¬Έμž₯")
10
+ input_number = gr.Number(label='λ¬Έμž₯수')
11
+ state = gr.State([])
12
+ with gr.Row():
13
+ with gr.Column():
14
+ b1 = gr.Button().style(full_width=True)
15
+ b1.click(get_story, [input_text, input_number, chatbot, state], [chatbot, state])
16
+
17
+ with gr.Tab("Voice Generation"):
18
+ input_text = gr.Textbox(lines=10, label="λ¬Έμž₯")
19
+ input_gender = gr.Radio(["남성", "μ—¬μ„±"], label="성별")
20
+ input_age = gr.Radio(["어린이", "μ²­μ†Œλ…„", "μ²­λ…„", "쀑년"], label="μ—°λ ΉλŒ€")
21
+ with gr.Row():
22
+ with gr.Column():
23
+ b2 = gr.Button().style(full_width=True)
24
+ b2.click(get_voice, [input_text, input_gender, input_age], [gr.Audio(label="κ²°κ³Ό μŒμ„± 파일", type="filepath")])
25
+
26
+ with gr.Tab("Music Generation"):
27
+ input_text = gr.Textbox(lines=10, label="λ¬Έμž₯")
28
+ input_duration = gr.Number(label="μŒμ•… μ‹œκ°„")
29
+ with gr.Row():
30
+ with gr.Column():
31
+ b3 = gr.Button().style(full_width=True)
32
+ b3.click(get_music, [input_text, input_duration], gr.Audio(label="κ²°κ³Ό μŒμ•… 파일", type='filepath'))
33
+
34
+ performance.queue(max_size=5).launch()
utils.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---
2
+ # jupyter:
3
+ # jupytext:
4
+ # formats: ipynb,py:light
5
+ # text_representation:
6
+ # extension: .py
7
+ # format_name: light
8
+ # format_version: '1.5'
9
+ # jupytext_version: 1.14.1
10
+ # kernelspec:
11
+ # display_name: Python 3 (ipykernel)
12
+ # language: python
13
+ # name: python3
14
+ # ---
15
+
16
+ OPENAPI_KEY = "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"
17
+ CLOVA_VOICE_Client_ID = "yulxvnhzer"
18
+ CLOVA_VOICE_Client_Secret = "EykVrsTYScAkp1dMghZAWp1oL5uB7T6dG01h7Xo7"
19
+ PAPAGO_Translate_Client_ID = "vlujenu5w4"
20
+ PAPAGO_Translate_Client_Secret = "1TvXphvjgjSHY2lk8Wbsk2TwH4PVx1bZmN006NjZ"
21
+ mubert_pat = "c29zczMyNjQuMTk3MzY2MTcuNDQ1ZDQ0NmQ1Y2Y1NTRiNWYwYzUyOTBmZjc2NmEzYTdjMmRhZmVkYi4xLjM.44ed0f5d055012782e08bf7276aefe81fe2e3c7ce233721ce1fb78e93ae5eeae"
22
+ SUMMARY_Client_ID = "2x59wtfeyj"
23
+ SUMMARY_Client_Secret = "in5CTQIsj303cZ3EHgPYjRucMFJvQNUl9YDQTAU5"
24
+
25
+ # +
26
+ import os
27
+ import sys
28
+ import time
29
+ import urllib.request
30
+ import json
31
+ import random
32
+ import requests
33
+ from voice import voice_dict
34
+
35
+ OPENAPI_KEY = os.getenv('OPENAPI_KEY')
36
+ CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
37
+ CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
38
+ PAPAGO_Translate_Client_ID = os.getenv('PAPAGO_Translate_Client_ID')
39
+ PAPAGO_Translate_Client_Secret = os.getenv('PAPAGO_Translate_Client_Secret')
40
+ mubert_pat = os.getenv('mubert_pat')
41
+ SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
42
+ SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
43
+
44
+
45
+ def get_story(first_sentence:str, num_sentences:int):
46
+ response = requests.post("https://api.openai.com/v1/chat/completions",
47
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
48
+ data=json.dumps({
49
+ "model": "gpt-3.5-turbo",
50
+ "messages": [{"role": "system", "content": "You are a helpful assistant."},
51
+ {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
52
+ \n\nFirst sentence: {first_sentence}"""}]
53
+ }))
54
+ return response.json()['choices'][0]['message']['content']
55
+
56
+ def get_voice(input_text:str, gender:str="female", age_group:str="youth", filename="voice.mp3"):
57
+ """
58
+ gender: female or male
59
+ age_group: child, teenager, youth, middle_aged
60
+ """
61
+ speaker = random.choice(voice_dict[gender][age_group])
62
+ data = {"speaker":speaker, "text":input_text}
63
+ url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
64
+ headers = {
65
+ "X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
66
+ "X-NCP-APIGW-API-KEY": CLOVA_VOICE_Client_Secret,
67
+ }
68
+ response = requests.post(url, headers=headers, data=data)
69
+ if response.status_code == 200:
70
+ print("TTS mp3 μ €μž₯")
71
+ response_body = response.content
72
+ with open(filename, 'wb') as f:
73
+ f.write(response_body)
74
+ else:
75
+ print("Error Code: " + str(response.status_code))
76
+ print("Error Message: " + str(response.json()))
77
+ return filename
78
+
79
+ def translate_text(text:str):
80
+
81
+ encText = urllib.parse.quote(text)
82
+ data = f"source=ko&target=en&text={encText}"
83
+ url = "https://naveropenapi.apigw.ntruss.com/nmt/v1/translation"
84
+
85
+ request = urllib.request.Request(url)
86
+ request.add_header("X-NCP-APIGW-API-KEY-ID", PAPAGO_Translate_Client_ID)
87
+ request.add_header("X-NCP-APIGW-API-KEY", PAPAGO_Translate_Client_Secret)
88
+
89
+ try:
90
+ response = urllib.request.urlopen(request, data=data.encode("utf-8"))
91
+ response_body = response.read()
92
+ return json.loads(response_body.decode('utf-8'))['message']['result']['translatedText']
93
+ except urllib.error.HTTPError as e:
94
+ return f"Error Code: {e.code}"
95
+
96
+
97
+ # -
98
+
99
+ def get_summary(input_text:str, summary_count:int = 5):
100
+ if len(input_text) > 2000:
101
+ input_text = input_text[:2000]
102
+ input_text = input_text.strip()
103
+
104
+ data = {
105
+ "document": {
106
+ "content": input_text
107
+ },
108
+ "option": {
109
+ "language": "ko",
110
+ "model": "general",
111
+ "tone": "0",
112
+ "summaryCount": summary_count
113
+ }
114
+ }
115
+ url = "https://naveropenapi.apigw.ntruss.com/text-summary/v1/summarize"
116
+ headers = {
117
+ "X-NCP-APIGW-API-KEY-ID": SUMMARY_Client_ID,
118
+ "X-NCP-APIGW-API-KEY": SUMMARY_Client_Secret,
119
+ "Content-Type": "application/json"
120
+ }
121
+ response = requests.post(url, headers=headers, data=json.dumps(data))
122
+ if response.status_code == 200:
123
+ return ' '.join(response.json()['summary'].split('\n'))
124
+ else:
125
+ print("Error Code: " + str(response.status_code))
126
+ print("Error Message: " + str(response.json()))
127
+
128
+
129
+ def get_music(text, duration=300):
130
+ print('original text length: ', len(text))
131
+ summary = get_summary(text, 3)
132
+ print('summary text length: ', len(summary))
133
+ translated_text = translate_text(summary)
134
+ print('translated_text length: ', len(translated_text))
135
+ if len(translated_text) > 200:
136
+ translated_text = translated_text[:200]
137
+
138
+ r = requests.post('https://api-b2b.mubert.com/v2/TTMRecordTrack',
139
+ json={
140
+ "method":"TTMRecordTrack",
141
+ "params":
142
+ {
143
+ "text":translated_text,
144
+ "pat":mubert_pat,
145
+ "mode":"track",
146
+ "duration":duration,
147
+ "bitrate":128
148
+ }
149
+ })
150
+
151
+ rdata = json.loads(r.text)
152
+ if rdata['status'] == 1:
153
+ url = rdata['data']['tasks'][0]['download_link']
154
+
155
+ done = False
156
+ while not done:
157
+ r = requests.post('https://api-b2b.mubert.com/v2/TrackStatus',
158
+ json={
159
+ "method":"TrackStatus",
160
+ "params":
161
+ {
162
+ "pat":mubert_pat
163
+ }
164
+ })
165
+
166
+ if r.json()['data']['tasks'][0]['task_status_text'] == 'Done':
167
+ done = True
168
+ time.sleep(2)
169
+
170
+ # return url
171
+ local_filename = "mubert_music.mp3"
172
+ headers = {
173
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
174
+ }
175
+
176
+ download = False
177
+ while not download:
178
+ response = requests.get(url, stream=True, headers=headers)
179
+
180
+ if response.status_code == 200:
181
+ download=True
182
+ time.sleep(1)
183
+
184
+ if response.status_code == 404:
185
+ print("파일이 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
186
+ return
187
+ elif response.status_code != 200:
188
+ print(f"파일 λ‹€μš΄λ‘œλ“œμ— μ‹€νŒ¨ν•˜μ˜€μŠ΅λ‹ˆλ‹€. μ—λŸ¬ μ½”λ“œ: {response.status_code}")
189
+ return
190
+
191
+ with open(local_filename, "wb") as f:
192
+ for chunk in response.iter_content(chunk_size=8192):
193
+ if chunk:
194
+ f.write(chunk)
195
+ print(f"{local_filename} 파일이 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
196
+ return local_filename
197
+
198
+
199
+ def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
200
+ history.append(first_sentence)
201
+ # make a POST request to the API endpoint using the requests.post method, passing in stream=True
202
+ response = requests.post("https://api.openai.com/v1/chat/completions",
203
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
204
+ stream=True,
205
+ data=json.dumps({
206
+ "stream": True,
207
+ "model": "gpt-3.5-turbo",
208
+ "messages": [{"role": "system", "content": "You are a helpful assistant."},
209
+ {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
210
+ \n\nFirst sentence: {first_sentence}"""}]
211
+ }))
212
+
213
+ token_counter = 0
214
+ partial_words = ""
215
+ counter=0
216
+ for chunk in response.iter_lines():
217
+ #Skipping first chunk
218
+ if counter == 0:
219
+ counter+=1
220
+ continue
221
+ # check whether each line is non-empty
222
+ if chunk.decode() :
223
+ chunk = chunk.decode()
224
+ # decode each line as response data is in bytes
225
+ if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
226
+ partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
227
+ if token_counter == 0:
228
+ history.append(" " + partial_words)
229
+ else:
230
+ history[-1] = partial_words
231
+ chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
232
+ token_counter+=1
233
+ yield chat, history, response
234
+
235
+
236
+ def get_voice_filename(text, gender, age):
237
+ filename = None
238
+ if gender == '남성':
239
+ if age == "어린이":
240
+ filename = get_voice(text, gender="male", age_group="child", filename="voice.mp3")
241
+ elif age == "μ²­μ†Œλ…„":
242
+ filename = get_voice(text, gender="male", age_group="teenager", filename="voice.mp3")
243
+ elif age == "μ²­λ…„":
244
+ filename = get_voice(text, gender="male", age_group="youth", filename="voice.mp3")
245
+ elif age == "쀑년":
246
+ filename = get_voice(text, gender="male", age_group="middle_aged", filename="voice.mp3")
247
+ else:
248
+ if age == "어린이":
249
+ filename = get_voice(text, gender="female", age_group="child", filename="voice.mp3")
250
+ elif age == "μ²­μ†Œλ…„":
251
+ filename = get_voice(text, gender="female", age_group="teenager", filename="voice.mp3")
252
+ elif age == "μ²­λ…„":
253
+ filename = get_voice(text, gender="female", age_group="youth", filename="voice.mp3")
254
+ elif age == "쀑년":
255
+ filename = get_voice(text, gender="female", age_group="middle_aged", filename="voice.mp3")
256
+ return filename
voice.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voice_dict = {
2
+ "female": {
3
+ "child": ["ndain","ngaram", "nmeow", "vdain"],
4
+ "teenager": ["nminseo", "nbora", "nihyun", "njiwon"],
5
+ "youth": ["nara","vara", "nminyoung", "nyuna", "vyuna", "vhyeri", "nes_c_hyeri", "ngoeun", "ntiffany", "nnarae", "njangj", "nyejin",
6
+ "njiyun", "nsujin", "nes_c_sohyun", "noyj", "neunseo", "nheera", "nyoungmi", "nyeji", "nsabina", "nyounghwa", "nshasha"],
7
+ "middle_aged": ["mijin", "neunyoung", "vmikyung", "nsunkyung", "nyujin", "nsunhee", "nes_c_mikyung", "nminjeong", "nkyunglee", "napple", ""]
8
+ },
9
+ "male": {
10
+ "child": ["nwoof", "nhajun"],
11
+ "teenager": ["njonghyun", "njoonyoung", "njaewook"],
12
+ "youth": ["jinho", "nminsang", "njinho", "njihun", "njihwan", "nseonghoon", "nsiyoon", "ntaejin", "njooahn", "nian", "vian", "vdonghyun",
13
+ "ndonghyun", "nsangdo", "neunwoo", "nraewon", "nreview", "nmovie"],
14
+ "middle_aged": ["nseungpyo", "nkyungtae", "nwontak", "nwoosik", "nyoungil", "nes_c_kihyo", "nkitae", "nkyuwon"]
15
+ }
16
+ }