ginipick commited on
Commit
cc10093
ยท
verified ยท
1 Parent(s): 5325327

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -735
app.py CHANGED
@@ -1,736 +1,2 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  import os
4
- import pandas as pd
5
- from typing import List, Dict, Tuple
6
- import json
7
- import io
8
- import traceback
9
- import csv
10
- from openai import OpenAI
11
- from functools import lru_cache
12
- from concurrent.futures import ThreadPoolExecutor
13
- import math
14
-
15
- # CSS ์„ค์ •
16
- css = """
17
- footer {
18
- visibility: hidden;
19
- }
20
- #chatbot-container, #chatbot-data-upload {
21
- height: 700px;
22
- overflow-y: scroll;
23
- }
24
- #chatbot-container .message, #chatbot-data-upload .message {
25
- font-size: 14px;
26
- }
27
- /* ์ž…๋ ฅ์ฐฝ ๋ฐฐ๊ฒฝ์ƒ‰ ๋ฐ ๊ธ€์ž์ƒ‰ ๋ณ€๊ฒฝ */
28
- textarea, input[type="text"] {
29
- background-color: #ffffff;
30
- color: #000000;
31
- }
32
- /* ํŒŒ์ผ ์—…๋กœ๋“œ ์˜์—ญ ๋†’์ด ์กฐ์ ˆ */
33
- #parquet-upload-area {
34
- max-height: 150px;
35
- overflow-y: auto;
36
- }
37
- /* ์ดˆ๊ธฐ ์„ค๋ช… ๊ธ€์”จ ํฌ๊ธฐ ์กฐ์ ˆ */
38
- #initial-description {
39
- font-size: 14px;
40
- }
41
- /* API Key ์ž…๋ ฅ ์„น์…˜ ์Šคํƒ€์ผ */
42
- .api-key-section {
43
- margin: 10px 0;
44
- padding: 10px;
45
- border: 1px solid #ddd;
46
- border-radius: 5px;
47
- }
48
- .api-key-status {
49
- margin-top: 5px;
50
- font-weight: bold;
51
- }
52
- """
53
-
54
- # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
55
- hf_client = InferenceClient(
56
- "CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
57
- )
58
-
59
- def load_code(filename: str) -> str:
60
- try:
61
- with open(filename, 'r', encoding='utf-8') as file:
62
- return file.read()
63
- except FileNotFoundError:
64
- return f"{filename} ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
65
- except Exception as e:
66
- return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
67
-
68
- def load_parquet(filename: str) -> str:
69
- try:
70
- df = pd.read_parquet(filename, engine='pyarrow')
71
- return df.head(10).to_markdown(index=False)
72
- except FileNotFoundError:
73
- return f"{filename} ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
74
- except Exception as e:
75
- return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
76
-
77
- def clean_response(text: str) -> str:
78
- """์‘๋‹ต ํ…์ŠคํŠธ ์ •์ œ ํ•จ์ˆ˜"""
79
- sentences = [s.strip() for s in text.split('.') if s.strip()]
80
- unique_sentences = []
81
- seen = set()
82
-
83
- for sentence in sentences:
84
- normalized = ' '.join(sentence.lower().split())
85
- if normalized not in seen:
86
- seen.add(normalized)
87
- unique_sentences.append(sentence)
88
-
89
- cleaned_text = '. '.join(unique_sentences)
90
- if cleaned_text and not cleaned_text.endswith('.'):
91
- cleaned_text += '.'
92
-
93
- return cleaned_text
94
-
95
- def remove_duplicates(text: str) -> str:
96
- """์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ ํ•จ์ˆ˜"""
97
- sentences = text.split('.')
98
- unique_sentences = []
99
- seen = set()
100
-
101
- for sentence in sentences:
102
- sentence = sentence.strip()
103
- if sentence and sentence not in seen:
104
- seen.add(sentence)
105
- unique_sentences.append(sentence)
106
-
107
- return '. '.join(unique_sentences)
108
-
109
- def upload_csv(file_path: str) -> Tuple[str, str]:
110
- try:
111
- df = pd.read_csv(file_path, sep=',')
112
- required_columns = {'id', 'text', 'label', 'metadata'}
113
- available_columns = set(df.columns)
114
- missing_columns = required_columns - available_columns
115
- if missing_columns:
116
- return f"CSV ํŒŒ์ผ์— ๋‹ค์Œ ํ•„์ˆ˜ ์ปฌ๋Ÿผ์ด ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {', '.join(missing_columns)}", ""
117
-
118
- df.drop_duplicates(inplace=True)
119
- df.fillna('', inplace=True)
120
- df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
121
-
122
- parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
123
- df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
124
- return f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜๊ณ  ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_filename
125
- except Exception as e:
126
- return f"CSV ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", ""
127
-
128
- def upload_parquet(file_path: str) -> Tuple[str, str, str]:
129
- try:
130
- df = pd.read_parquet(file_path, engine='pyarrow')
131
-
132
- data_info = {
133
- "์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜": len(df),
134
- "์ปฌ๋Ÿผ ๋ชฉ๋ก": list(df.columns),
135
- "๋ฐ์ดํ„ฐ ํƒ€์ž…": df.dtypes.to_dict(),
136
- "๊ฒฐ์ธก์น˜ ์ •๋ณด": df.isnull().sum().to_dict()
137
- }
138
-
139
- summary = []
140
- summary.append(f"### ๋ฐ์ดํ„ฐ์…‹ ๊ธฐ๋ณธ ์ •๋ณด:")
141
- summary.append(f"- ์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜: {data_info['์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜']}")
142
- summary.append(f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(data_info['์ปฌ๋Ÿผ ๋ชฉ๋ก'])}")
143
-
144
- summary.append("\n### ์ปฌ๋Ÿผ๋ณ„ ์ •๋ณด:")
145
- for col in df.columns:
146
- if df[col].dtype in ['int64', 'float64']:
147
- stats = df[col].describe()
148
- summary.append(f"\n{col} (์ˆ˜์น˜ํ˜•):")
149
- summary.append(f"- ํ‰๊ท : {stats['mean']:.2f}")
150
- summary.append(f"- ์ตœ์†Œ: {stats['min']}")
151
- summary.append(f"- ์ตœ๋Œ€: {stats['max']}")
152
- elif df[col].dtype == 'object' or df[col].dtype == 'string':
153
- unique_count = df[col].nunique()
154
- summary.append(f"\n{col} (ํ…์ŠคํŠธ):")
155
- summary.append(f"- ๊ณ ์œ ๊ฐ’ ์ˆ˜: {unique_count}")
156
- if unique_count < 10:
157
- value_counts = df[col].value_counts().head(5)
158
- summary.append("- ์ƒ์œ„ 5๊ฐœ ๊ฐ’:")
159
- for val, count in value_counts.items():
160
- summary.append(f" โ€ข {val}: {count}๊ฐœ")
161
-
162
- preview = df.head(10).to_markdown(index=False)
163
- summary.append("\n### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:")
164
- summary.append(preview)
165
-
166
- parquet_content = "\n".join(summary)
167
- parquet_json = df.to_json(orient='records', force_ascii=False)
168
-
169
- return "Parquet ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_json
170
- except Exception as e:
171
- return f"Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
172
-
173
- def text_to_parquet(text: str) -> Tuple[str, str, str]:
174
- try:
175
- lines = [line.strip() for line in text.split('\n') if line.strip()]
176
- data = []
177
-
178
- for line in lines:
179
- try:
180
- import re
181
- pattern = r'(\d+),([^,]+),([^,]+),(.+)'
182
- match = re.match(pattern, line)
183
-
184
- if match:
185
- id_val, text_val, label_val, metadata_val = match.groups()
186
- text_val = text_val.strip().strip('"')
187
- label_val = label_val.strip().strip('"')
188
- metadata_val = metadata_val.strip().strip('"')
189
-
190
- data.append({
191
- 'id': int(id_val),
192
- 'text': text_val,
193
- 'label': label_val,
194
- 'metadata': metadata_val
195
- })
196
- except Exception as e:
197
- print(f"๋ผ์ธ ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {line}\n{str(e)}")
198
- continue
199
-
200
- if not data:
201
- return "๋ณ€ํ™˜ํ•  ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", ""
202
-
203
- df = pd.DataFrame(data)
204
- df = df.astype({
205
- 'id': 'int32',
206
- 'text': 'string',
207
- 'label': 'string',
208
- 'metadata': 'string'
209
- })
210
-
211
- parquet_filename = 'text_to_parquet.parquet'
212
- df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
213
- preview = df.to_markdown(index=False)
214
-
215
- return (
216
- f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์ด {len(df)}๊ฐœ์˜ ๋ ˆ์ฝ”๋“œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
217
- preview,
218
- parquet_filename
219
- )
220
-
221
- except Exception as e:
222
- error_message = f"ํ…์ŠคํŠธ ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
223
- print(f"{error_message}\n{traceback.format_exc()}")
224
- return error_message, "", ""
225
-
226
- def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None, api_key: str = None) -> str:
227
- if not api_key:
228
- yield "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
229
- return
230
-
231
- # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
232
- client = OpenAI(api_key=api_key)
233
-
234
- system_prefix = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ. ๋„ˆ๋Š” ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” ์—ญํ• ์„ ํ•œ๋‹ค.
235
-
236
- ์ฃผ์š” ์ง€์นจ:
237
- 1. ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋‚ด์šฉ๋งŒ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
238
- 2. ์ด์ „ ๋‹ต๋ณ€๊ณผ ์ค‘๋ณต๋˜๋Š” ๋‚ด์šฉ์€ ์ œ์™ธํ•  ๊ฒƒ
239
- 3. ๋ถˆํ•„์š”ํ•œ ์˜ˆ์‹œ๋‚˜ ๋ถ€์—ฐ ์„ค๋ช…์€ ํ•˜์ง€ ๋ง ๊ฒƒ
240
- 4. ๋™์ผํ•œ ๋‚ด์šฉ์„ ๋‹ค๋ฅธ ํ‘œํ˜„์œผ๋กœ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
241
- 5. ํ•ต์‹ฌ ์ •๋ณด๋งŒ ์ „๋‹ฌํ•  ๊ฒƒ
242
- """
243
-
244
- if parquet_data:
245
- try:
246
- df = pd.read_json(io.StringIO(parquet_data))
247
- data_summary = df.describe(include='all').to_string()
248
- system_prefix += f"\n\n๋ฐ์ดํ„ฐ ์š”์•ฝ:\n{data_summary}"
249
- except Exception as e:
250
- print(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
251
-
252
- messages = [{"role": "system", "content": system_prefix}]
253
- recent_history = history[-3:] if history else []
254
- for chat in recent_history:
255
- messages.append({"role": chat["role"], "content": chat["content"]})
256
-
257
- messages.append({"role": "user", "content": message})
258
-
259
- try:
260
- response = client.chat.completions.create(
261
- model="gpt-4o-mini",
262
- messages=messages,
263
- max_tokens=max_tokens,
264
- temperature=temperature,
265
- top_p=top_p,
266
- stream=True
267
- )
268
-
269
- full_response = ""
270
- for chunk in response:
271
- if chunk.choices[0].delta.content:
272
- full_response += chunk.choices[0].delta.content
273
- yield clean_response(full_response)
274
-
275
- except Exception as e:
276
- error_message = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
277
- print(f"{error_message}\n{traceback.format_exc()}")
278
- yield error_message
279
-
280
- def preprocess_text_with_llm(input_text: str, api_key: str = None) -> str:
281
- if not api_key:
282
- return "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
283
-
284
- # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
285
- client = OpenAI(api_key=api_key)
286
-
287
- system_prompt = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ๋‹ต๋ณ€ํ•˜์‹œ์˜ค. ๋‹น์‹ ์€ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ๋ฅผ CSV ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜์„ธ์š”.
288
-
289
- ๊ทœ์น™:
290
- 1. ์ถœ๋ ฅ ํ˜•์‹: id,text,label,metadata
291
- 2. id: 1๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋Š” ์ˆœ์ฐจ์  ๋ฒˆํ˜ธ
292
- 3. text: ์˜๋ฏธ ์žˆ๋Š” ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ๋œ ํ…์ŠคํŠธ
293
- 4. label: ํ…์ŠคํŠธ์˜ ์ฃผ์ œ๋‚˜ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์•„๋ž˜ ๊ธฐ์ค€์œผ๋กœ ์ •ํ™•ํ•˜๊ฒŒ ํ•œ ๊ฐœ๋งŒ ์„ ํƒ
294
- - Historical_Figure (์—ญ์‚ฌ์  ์ธ๋ฌผ)
295
- - Military_History (๊ตฐ์‚ฌ ์—ญ์‚ฌ)
296
- - Technology (๊ธฐ์ˆ )
297
- - Politics (์ •์น˜)
298
- - Culture (๋ฌธํ™”)
299
- 5. metadata: ๋‚ ์งœ, ์ถœ์ฒ˜ ๋“ฑ ์ถ”๊ฐ€ ์ •๋ณด"""
300
-
301
- try:
302
- response = client.chat.completions.create(
303
- model="gpt-4-0125-preview",
304
- messages=[
305
- {"role": "system", "content": system_prompt},
306
- {"role": "user", "content": input_text}
307
- ],
308
- max_tokens=4000,
309
- temperature=0.1,
310
- stream=True
311
- )
312
-
313
- full_response = ""
314
- for chunk in response:
315
- if chunk.choices[0].delta.content:
316
- full_response += chunk.choices[0].delta.content
317
-
318
- processed_text = clean_response(full_response)
319
-
320
- try:
321
- from io import StringIO
322
- import csv
323
- csv.reader(StringIO(processed_text))
324
- return processed_text
325
- except csv.Error:
326
- return "LLM์ด ์˜ฌ๋ฐ”๋ฅธ CSV ํ˜•์‹์„ ์ƒ์„ฑํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
327
-
328
- except Exception as e:
329
- error_message = f"์ „์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
330
- print(error_message)
331
- return error_message
332
-
333
-
334
- # Gradio Blocks ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
335
- with gr.Blocks(css=css) as demo:
336
- api_key_state = gr.State("") # API ํ‚ค๋ฅผ ์ €์žฅํ•  State ์ถ”๊ฐ€
337
-
338
- gr.Markdown("# MyEzRAG: LLM์ด ๋‚˜๋งŒ์˜ ๋ฐ์ดํ„ฐ๋กœ ํ•™์Šตํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ/๋‹ต๋ณ€", elem_id="initial-description")
339
-
340
- # API ํ‚ค ์ž…๋ ฅ ์„น์…˜ ์ถ”๊ฐ€
341
- with gr.Row(elem_classes="api-key-section"):
342
- with gr.Column(scale=3):
343
- api_key_input = gr.Textbox(
344
- label="OpenAI API Key",
345
- placeholder="sk-...",
346
- type="password",
347
- show_label=True
348
- )
349
- with gr.Column(scale=1):
350
- api_key_button = gr.Button("API Key ์„ค์ •", variant="primary")
351
-
352
- # API ํ‚ค ์ƒํƒœ ํ‘œ์‹œ
353
- api_key_status = gr.Markdown("โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", elem_classes="api-key-status")
354
-
355
- # API ํ‚ค ์„ค์ • ํ•จ์ˆ˜
356
- def set_api_key(api_key: str):
357
- if not api_key.strip():
358
- return "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", ""
359
- if not api_key.startswith("sk-"):
360
- return "โŒ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์€ API Key ํ˜•์‹์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ํ™•์ธํ•ด์ฃผ์„ธ์š”.", ""
361
- return "โœ… API Key๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", api_key
362
-
363
- # API ํ‚ค ์„ค์ • ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
364
- api_key_button.click(
365
- set_api_key,
366
- inputs=[api_key_input],
367
- outputs=[api_key_status, api_key_state]
368
- )
369
-
370
- gr.Markdown(
371
- "### '์‚ฌ์šฉ ๋ฐฉ๋ฒ•' ํƒญ์„ ํ†ตํ•ด ์ž์„ธํ•œ ์ด์šฉ ๋ฐฉ๋ฒ•์„ ์ฐธ๊ณ ํ•˜์„ธ์š”.\n"
372
- "### Tip) '์˜ˆ์ œ'๋ฅผ ํ†ตํ•ด ๋‹ค์–‘ํ•œ ํ™œ์šฉ ๋ฐฉ๋ฒ•์„ ์ฒดํ—˜ํ•˜๊ณ  ์‘์šฉํ•ด ๋ณด์„ธ์š”, ๋ฐ์ดํ„ฐ์…‹ ์—…๋กœ๋“œ์‹œ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋Š” 10๊ฑด๋งŒ ์ถœ๋ ฅ",
373
- elem_id="initial-description"
374
- )
375
-
376
- # ์ฒซ ๋ฒˆ์งธ ํƒญ: My ๋ฐ์ดํ„ฐ์…‹+LLM
377
- with gr.Tab("My ๋ฐ์ดํ„ฐ์…‹+LLM"):
378
- gr.Markdown("### LLM๊ณผ ๋Œ€ํ™”ํ•˜๊ธฐ")
379
- chatbot_data_upload = gr.Chatbot(label="์ฑ—๋ด‡", type="messages", elem_id="chatbot-data-upload")
380
- msg_data_upload = gr.Textbox(label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ", placeholder="์—ฌ๊ธฐ์— ๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...")
381
- send_data_upload = gr.Button("์ „์†ก")
382
-
383
- with gr.Accordion("์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ฐ ์˜ต์…˜ ์„ค์ •", open=False):
384
- system_message = gr.Textbox(label="System Message", value="๋„ˆ๋Š” AI ์กฐ์–ธ์ž ์—ญํ• ์ด๋‹ค.")
385
- max_tokens = gr.Slider(minimum=1, maximum=8000, value=1000, label="Max Tokens")
386
- temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
387
- top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
388
-
389
- parquet_data_state = gr.State()
390
-
391
- def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str, api_key: str):
392
- if not api_key:
393
- history = history or []
394
- history.append({"role": "assistant", "content": "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."})
395
- yield history, ""
396
- return
397
-
398
- history = history or []
399
- recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
400
- if message.strip().lower() in recent_questions:
401
- yield history + [{"role": "assistant", "content": "๋™์ผํ•œ ์งˆ๋ฌธ์ด ์ตœ๊ทผ์— ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ํ•ด์ฃผ์„ธ์š”."}], ""
402
- return
403
-
404
- try:
405
- history.append({"role": "user", "content": message})
406
- response_gen = respond(
407
- message,
408
- history,
409
- system_message,
410
- max_tokens,
411
- temperature=0.3,
412
- top_p=top_p,
413
- parquet_data=parquet_data,
414
- api_key=api_key
415
- )
416
-
417
- partial_response = ""
418
- for partial in response_gen:
419
- partial_response = partial
420
- display_history = history + [{"role": "assistant", "content": partial_response}]
421
- yield display_history, ""
422
-
423
- history.append({"role": "assistant", "content": partial_response})
424
- except Exception as e:
425
- response = f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
426
- history.append({"role": "assistant", "content": response})
427
- yield history, ""
428
-
429
- send_data_upload.click(
430
- handle_message_data_upload,
431
- inputs=[
432
- msg_data_upload,
433
- chatbot_data_upload,
434
- system_message,
435
- max_tokens,
436
- temperature,
437
- top_p,
438
- parquet_data_state,
439
- api_key_state,
440
- ],
441
- outputs=[chatbot_data_upload, msg_data_upload],
442
- queue=True
443
- )
444
-
445
- # ์˜ˆ์ œ ์ถ”๊ฐ€
446
- with gr.Accordion("์˜ˆ์ œ", open=False):
447
- gr.Examples(
448
- examples=[
449
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•ด ์š”์•ฝ ์„ค๋ช…ํ•˜๋ผ."],
450
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ํ•™์Šต ๋ฐ์ดํ„ฐ๋กœ ํ™œ์šฉํ•˜์—ฌ, ๋ณธ ์„œ๋น„์Šค๋ฅผ SEO ์ตœ์ ํ™”ํ•˜์—ฌ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ(๊ฐœ์š”, ๋ฐฐ๊ฒฝ ๋ฐ ํ•„์š”์„ฑ, ๊ธฐ์กด ์œ ์‚ฌ ์ œํ’ˆ/์„œ๋น„์Šค์™€ ๋น„๊ตํ•˜์—ฌ ํŠน์žฅ์ , ํ™œ์šฉ์ฒ˜, ๊ฐ€์น˜, ๊ธฐ๋Œ€ํšจ๊ณผ, ๊ฒฐ๋ก ์„ ํฌํ•จ)๋กœ 4000 ํ† ํฐ ์ด์ƒ ์ž‘์„ฑํ•˜๋ผ"],
451
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ํ•™์Šต ๋ฐ์ดํ„ฐ๋กœ ํ™œ์šฉํ•˜์—ฌ, ์‚ฌ์šฉ ๋ฐฉ๋ฒ•๊ณผ ์ฐจ๋ณ„์ , ํŠน์ง•, ๊ฐ•์ ์„ ์ค‘์‹ฌ์œผ๋กœ 4000 ํ† ํฐ ์ด์ƒ ์œ ํŠœ๋ธŒ ์˜์ƒ ์Šคํฌ๋ฆฝํŠธ ํ˜•ํƒœ๋กœ ์ž‘์„ฑํ•˜๋ผ"],
452
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ํ•™์Šต ๋ฐ์ดํ„ฐ๋กœ ํ™œ์šฉํ•˜์—ฌ, ์ œํ’ˆ ์ƒ์„ธ ํŽ˜์ด์ง€ ํ˜•์‹์˜ ๋‚ด์šฉ์„ 4000 ํ† ํฐ ์ด์ƒ ์ž์„ธํžˆ ์„ค๋ช…ํ•˜๋ผ"],
453
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ํ•™์Šต ๋ฐ์ดํ„ฐ๋กœ ํ™œ์šฉํ•˜์—ฌ, FAQ 20๊ฑด์„ ์ƒ์„ธํ•˜๊ฒŒ ์ž‘์„ฑํ•˜๋ผ. 4000ํ† ํฐ ์ด์ƒ ์‚ฌ์šฉํ•˜๋ผ."],
454
- ["์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ํ•™์Šต ๋ฐ์ดํ„ฐ๋กœ ํ™œ์šฉํ•˜์—ฌ, ํŠนํ—ˆ ์ถœ์›์— ํ™œ์šฉํ•  ๊ธฐ์ˆ  ๋ฐ ๋น„์ฆˆ๋‹ˆ์Šค ๋ชจ๋ธ ์ธก๋ฉด์„ ํฌํ•จํ•˜์—ฌ ํŠนํ—ˆ ์ถœ์›์„œ ๊ตฌ์„ฑ์— ๋งž๊ฒŒ ํ˜์‹ ์ ์ธ ์ฐฝ์˜ ๋ฐœ๋ช… ๋‚ด์šฉ์„ ์ค‘์‹ฌ์œผ๋กœ 4000 ํ† ํฐ ์ด์ƒ ์ž‘์„ฑํ•˜๋ผ."],
455
- ],
456
- inputs=msg_data_upload,
457
- label="์˜ˆ์ œ ์„ ํƒ",
458
- )
459
-
460
- # Parquet ํŒŒ์ผ ์—…๋กœ๋“œ
461
- gr.Markdown("### Parquet ํŒŒ์ผ ์—…๋กœ๋“œ")
462
- with gr.Row():
463
- with gr.Column():
464
- parquet_upload = gr.File(
465
- label="Parquet ํŒŒ์ผ ์—…๋กœ๋“œ", type="filepath", elem_id="parquet-upload-area"
466
- )
467
- parquet_upload_button = gr.Button("์—…๋กœ๋“œ")
468
- parquet_upload_status = gr.Textbox(label="์—…๋กœ๋“œ ์ƒํƒœ", interactive=False)
469
- parquet_preview_chat = gr.Markdown(label="Parquet ํŒŒ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
470
-
471
- def handle_parquet_upload(file_path: str):
472
- message, parquet_content, parquet_json = upload_parquet(file_path)
473
- if parquet_json:
474
- return message, parquet_content, parquet_json
475
- else:
476
- return message, "", ""
477
-
478
- parquet_upload_button.click(
479
- handle_parquet_upload,
480
- inputs=parquet_upload,
481
- outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
482
- )
483
-
484
- # ๋‘ ๋ฒˆ์งธ ํƒญ: CSV to My ๋ฐ์ดํ„ฐ์…‹
485
- with gr.Tab("CSV to My ๋ฐ์ดํ„ฐ์…‹"):
486
- gr.Markdown("### CSV ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ Parquet ๋ณ€ํ™˜")
487
- with gr.Row():
488
- with gr.Column():
489
- csv_file = gr.File(label="CSV ํŒŒ์ผ ์—…๋กœ๋“œ", type="filepath")
490
- upload_button = gr.Button("์—…๋กœ๋“œ ๋ฐ ๋ณ€ํ™˜")
491
- upload_status = gr.Textbox(label="์—…๋กœ๋“œ ์ƒํƒœ", interactive=False)
492
- parquet_preview = gr.Markdown(label="Parquet ํŒŒ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
493
- download_button = gr.File(label="Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ", interactive=False)
494
-
495
- def handle_csv_upload(file_path: str):
496
- message, parquet_filename = upload_csv(file_path)
497
- if parquet_filename:
498
- parquet_content = load_parquet(parquet_filename)
499
- return message, parquet_content, parquet_filename
500
- else:
501
- return message, "", None
502
-
503
- upload_button.click(
504
- handle_csv_upload,
505
- inputs=csv_file,
506
- outputs=[upload_status, parquet_preview, download_button]
507
- )
508
-
509
- # ์„ธ ๋ฒˆ์งธ ํƒญ: Text to My ๋ฐ์ดํ„ฐ์…‹
510
- with gr.Tab("Text to My ๋ฐ์ดํ„ฐ์…‹"):
511
- gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด CSV๋กœ ๋ณ€ํ™˜ ํ›„ Parquet์œผ๋กœ ์ž๋™ ์ „ํ™˜๋ฉ๋‹ˆ๋‹ค.")
512
- with gr.Row():
513
- with gr.Column():
514
- text_input = gr.Textbox(
515
- label="ํ…์ŠคํŠธ ์ž…๋ ฅ (๊ฐ ํ–‰์€ `id,text,label,metadata` ํ˜•์‹์œผ๋กœ ์ž…๋ ฅ)",
516
- lines=10,
517
- placeholder='์˜ˆ: 1,"์ด์ˆœ์‹ ","์žฅ๊ตฐ","๊ฑฐ๋ถ์„ "\n2,"์›๊ท ","์žฅ๊ตฐ","๋ชจํ•จ"\n3,"์„ ์กฐ","์™•","์‹œ๊ธฐ"\n4,"๋„์š”ํ† ๋ฏธ ํžˆ๋ฐ์š”์‹œ","์™•","์นจ๋žต"'
518
- )
519
- convert_button = gr.Button("๋ณ€ํ™˜ ๋ฐ ๋‹ค์šด๋กœ๋“œ")
520
- convert_status = gr.Textbox(label="๋ณ€ํ™˜ ์ƒํƒœ", interactive=False)
521
- parquet_preview_convert = gr.Markdown(label="Parquet ํŒŒ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
522
- download_parquet_convert = gr.File(label="Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ", interactive=False)
523
-
524
- def handle_text_to_parquet(text: str):
525
- message, parquet_content, parquet_filename = text_to_parquet(text)
526
- if parquet_filename:
527
- return message, parquet_content, parquet_filename
528
- else:
529
- return message, "", None
530
-
531
- convert_button.click(
532
- handle_text_to_parquet,
533
- inputs=text_input,
534
- outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
535
- )
536
-
537
- # ๋„ค ๋ฒˆ์งธ ํƒญ: Text Preprocessing with LLM
538
- with gr.Tab("Text Preprocessing with LLM"):
539
- gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด LLM์ด ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์— ๋งž๊ฒŒ ์ „์ฒ˜๋ฆฌํ•˜์—ฌ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
540
- with gr.Row():
541
- with gr.Column():
542
- raw_text_input = gr.Textbox(
543
- label="ํ…์ŠคํŠธ ์ž…๋ ฅ",
544
- lines=15,
545
- placeholder="์—ฌ๊ธฐ์— ์ „์ฒ˜๋ฆฌํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”..."
546
- )
547
-
548
- with gr.Row():
549
- preprocess_button = gr.Button("์ „์ฒ˜๋ฆฌ ์‹คํ–‰", variant="primary")
550
- clear_button = gr.Button("์ดˆ๊ธฐํ™”")
551
-
552
- preprocess_status = gr.Textbox(
553
- label="์ „์ฒ˜๋ฆฌ ์ƒํƒœ",
554
- interactive=False,
555
- value="๋Œ€๊ธฐ ์ค‘..."
556
- )
557
-
558
- processed_text_output = gr.Textbox(
559
- label="์ „์ฒ˜๋ฆฌ๋œ ๋ฐ์ดํ„ฐ์…‹ ์ถœ๋ ฅ",
560
- lines=15,
561
- interactive=False
562
- )
563
-
564
- convert_to_parquet_button = gr.Button("Parquet์œผ๋กœ ๋ณ€ํ™˜")
565
- download_parquet = gr.File(label="๋ณ€ํ™˜๋œ Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ")
566
-
567
- def handle_text_preprocessing(input_text: str, api_key: str):
568
- if not api_key:
569
- yield "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", ""
570
- return
571
-
572
- if not input_text.strip():
573
- yield "์ž…๋ ฅ ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
574
- return
575
-
576
- try:
577
- yield "์ „์ฒ˜๋ฆฌ๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...", ""
578
- processed_text = preprocess_text_with_llm(input_text, api_key)
579
-
580
- if processed_text:
581
- yield "์ „์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", processed_text
582
- else:
583
- yield "์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
584
-
585
- except Exception as e:
586
- yield f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", ""
587
-
588
- def clear_inputs():
589
- return "", "๋Œ€๏ฟฝ๏ฟฝ ์ค‘...", ""
590
-
591
- def convert_to_parquet_file(processed_text: str):
592
- if not processed_text.strip():
593
- return "๋ณ€ํ™˜ํ•  ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", None
594
-
595
- try:
596
- message, parquet_content, parquet_filename = text_to_parquet(processed_text)
597
- if parquet_filename:
598
- return message, parquet_filename
599
- return message, None
600
- except Exception as e:
601
- return f"Parquet ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", None
602
-
603
- preprocess_button.click(
604
- handle_text_preprocessing,
605
- inputs=[raw_text_input, api_key_state],
606
- outputs=[preprocess_status, processed_text_output],
607
- queue=True
608
- )
609
-
610
- clear_button.click(
611
- clear_inputs,
612
- outputs=[raw_text_input, preprocess_status, processed_text_output]
613
- )
614
-
615
- convert_to_parquet_button.click(
616
- convert_to_parquet_file,
617
- inputs=[processed_text_output],
618
- outputs=[preprocess_status, download_parquet]
619
- )
620
-
621
- with gr.Accordion("์˜ˆ์ œ ํ…์ŠคํŠธ", open=False):
622
- gr.Examples(
623
- examples=[
624
- ["์ด์ˆœ์‹ ์€ ์กฐ์„  ์ค‘๊ธฐ์˜ ๋ฌด์‹ ์ด๋‹ค. ๊ทธ๋Š” ์ž„์ง„์™œ๋ž€ ๋‹น์‹œ ํ•ด๊ตฐ์„ ์ด๋Œ์—ˆ๋‹ค. ๊ฑฐ๋ถ์„ ์„ ๋งŒ๋“ค์–ด ์™œ๊ตฐ๊ณผ ์‹ธ์› ๋‹ค."],
625
- ["์ธ๊ณต์ง€๋Šฅ์€ ์ปดํ“จํ„ฐ ๊ณผํ•™์˜ ํ•œ ๋ถ„์•ผ์ด๋‹ค. ๊ธฐ๊ณ„ํ•™์Šต์€ ์ธ๊ณต์ง€๋Šฅ์˜ ํ•˜์œ„ ๋ถ„์•ผ์ด๋‹ค. ๋”ฅ๋Ÿฌ๋‹์€ ๊ธฐ๊ณ„ํ•™์Šต์˜ ํ•œ ๋ฐฉ๋ฒ•์ด๋‹ค."]
626
- ],
627
- inputs=raw_text_input,
628
- label="์˜ˆ์ œ ์„ ํƒ"
629
- )
630
-
631
- # ์‚ฌ์šฉ ๋ฐฉ๋ฒ• ํƒญ
632
- with gr.Tab("๐Ÿ“š ์‚ฌ์šฉ ๋ฐฉ๋ฒ•"):
633
- gr.Markdown("""
634
- # MyEzRAG ์‚ฌ์šฉ ๊ฐ€์ด๋“œ
635
-
636
- ## ๐Ÿ”‘ API Key ์„ค์ •
637
- 1. OpenAI API Key๋ฅผ ์ƒ๋‹จ ์ž…๋ ฅ์ฐฝ์— ์ž…๋ ฅ
638
- 2. 'API Key ์„ค์ •' ๋ฒ„ํŠผ ํด๋ฆญ
639
- 3. ์„ค์ • ์„ฑ๊ณต ๋ฉ”์‹œ์ง€ ํ™•์ธ
640
-
641
- ## 1๏ธโƒฃ My ๋ฐ์ดํ„ฐ์…‹+LLM ํƒญ
642
- ### ๊ธฐ๋Šฅ
643
- - ์—…๋กœ๋“œ๋œ Parquet ๋ฐ์ดํ„ฐ์…‹์„ ๊ธฐ๋ฐ˜์œผ๋กœ LLM๊ณผ ๋Œ€ํ™”
644
- - ๋ฐ์ดํ„ฐ์…‹์˜ ๋‚ด์šฉ์„ ํ™œ์šฉํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ
645
-
646
- ### ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
647
- 1. Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์„น์…˜์—์„œ ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ์„ ์—…๋กœ๋“œ
648
- 2. ์ฑ„ํŒ…์ฐฝ์— ์›ํ•˜๋Š” ์งˆ๋ฌธ์ด๋‚˜ ์š”์ฒญ์‚ฌํ•ญ ์ž…๋ ฅ
649
- 3. ์˜ˆ์ œ ๋ฒ„ํŠผ์„ ํ™œ์šฉํ•˜์—ฌ ๋‹ค์–‘ํ•œ ํ™œ์šฉ ์‚ฌ๋ก€ ์ฒดํ—˜
650
-
651
- ### ํŒ
652
- - ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •์œผ๋กœ ์‘๋‹ต ์Šคํƒ€์ผ ์กฐ์ • ๊ฐ€๋Šฅ
653
- - ์ƒ์„ธํ•œ ์งˆ๋ฌธ์ผ์ˆ˜๋ก ๋” ์ •ํ™•ํ•œ ๋‹ต๋ณ€ ์ œ๊ณต
654
-
655
- ---
656
-
657
- ## 2๏ธโƒฃ CSV to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
658
- ### ๊ธฐ๋Šฅ
659
- - CSV ํŒŒ์ผ์„ Parquet ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
660
- - ๋ฐ์ดํ„ฐ ์ตœ์ ํ™” ๋ฐ ์ •์ œ
661
-
662
- ### ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
663
- 1. CSV ํŒŒ์ผ ์ค€๋น„ (ํ•„์ˆ˜ ์ปฌ๋Ÿผ: id, text, label, metadata)
664
- 2. ํŒŒ์ผ ์—…๋กœ๋“œ ํ›„ '์—…๋กœ๋“œ ๋ฐ ๋ณ€ํ™˜' ๋ฒ„ํŠผ ํด๋ฆญ
665
- 3. ๋ณ€ํ™˜๋œ Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
666
-
667
- ### ์ฃผ์˜์‚ฌํ•ญ
668
- - CSV ํŒŒ์ผ์€ ๋ฐ˜๋“œ์‹œ ํ•„์ˆ˜ ์ปฌ๋Ÿผ์„ ํฌํ•จํ•ด์•ผ ํ•จ
669
- - ์ธ์ฝ”๋”ฉ์€ UTF-8 ๊ถŒ์žฅ
670
-
671
- ---
672
-
673
- ## 3๏ธโƒฃ Text to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
674
- ### ๊ธฐ๋Šฅ
675
- - ํ…์ŠคํŠธ ํ˜•์‹์˜ ๋ฐ์ดํ„ฐ๋ฅผ Parquet์œผ๋กœ ๋ณ€ํ™˜
676
- - ์ˆ˜๋™ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ ์ง€์›
677
-
678
- ### ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
679
- 1. ์ง€์ •๋œ ํ˜•์‹์œผ๋กœ ํ…์ŠคํŠธ ์ž…๋ ฅ
680
- ```
681
- 1,"์ด์ˆœ์‹ ","์žฅ๊ตฐ","๊ฑฐ๋ถ์„ "
682
- 2,"์›๊ท ","์žฅ๊ตฐ","๋ชจํ•จ"
683
- ```
684
- 2. '๋ณ€ํ™˜ ๋ฐ ๋‹ค์šด๋กœ๋“œ' ๋ฒ„ํŠผ ํด๋ฆญ
685
- 3. ๋ณ€ํ™˜๋œ ํŒŒ์ผ ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ
686
-
687
- ### ์ž…๋ ฅ ํ˜•์‹
688
- - id: ์ˆœ์ฐจ์  ๋ฒˆํ˜ธ
689
- - text: ์‹ค์ œ ํ…์ŠคํŠธ ๋‚ด์šฉ
690
- - label: ๋ถ„๋ฅ˜ ๋ผ๋ฒจ
691
- - metadata: ๋ถ€๊ฐ€ ์ •๋ณด
692
-
693
- ---
694
-
695
- ## 4๏ธโƒฃ Text Preprocessing with LLM ํƒญ
696
- ### ๊ธฐ๋Šฅ
697
- - LLM์„ ํ™œ์šฉํ•œ ์ž๋™ ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ
698
- - ๊ตฌ์กฐํ™”๋œ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ
699
-
700
- ### ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
701
- 1. ์›๋ฌธ ํ…์ŠคํŠธ ์ž…๋ ฅ
702
- 2. '์ „์ฒ˜๋ฆฌ ์‹คํ–‰' ๋ฒ„ํŠผ ํด๋ฆญ
703
- 3. ๊ฒฐ๊ณผ ํ™•์ธ ํ›„ ํ•„์š”์‹œ Parquet ๋ณ€ํ™˜
704
-
705
- ### ํŠน์ง•
706
- - ์ž๋™ ๋ ˆ์ด๋ธ”๋ง
707
- - ๋ฌธ์žฅ ๋‹จ์œ„ ๋ถ„๋ฆฌ
708
- - ์ค‘๋ณต ์ œ๊ฑฐ
709
- - ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
710
-
711
- ## ๐Ÿ’ก ์ผ๋ฐ˜์ ์ธ ํŒ
712
- - API Key๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๋ณด๊ด€ํ•˜๊ณ  ์ฃผ๊ธฐ์ ์œผ๋กœ ๊ฐฑ์‹ 
713
- - ๊ฐ ํƒญ์˜ ์˜ˆ์ œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ๋ฒ• ์ตํžˆ๊ธฐ
714
- - ๋ฐ์ดํ„ฐ ํ’ˆ์งˆ์ด ์ข‹์„์ˆ˜๋ก ๋” ๋‚˜์€ ๊ฒฐ๊ณผ ์ œ๊ณต
715
- - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
716
- - ๋Œ€์šฉ๋Ÿ‰ ์ฒ˜๋ฆฌ ์‹œ ์ ์ ˆํ•œ ์ฒญํฌ ํฌ๊ธฐ๋กœ ๋ถ„ํ•  ์ฒ˜๋ฆฌ
717
-
718
- ## โš ๏ธ ์ฃผ์˜์‚ฌํ•ญ
719
- - API Key๋ฅผ ํƒ€์ธ๊ณผ ๊ณต์œ ํ•˜์ง€ ์•Š๊ธฐ
720
- - ๋ฏผ๊ฐํ•œ ๊ฐœ์ธ์ •๋ณด ํฌํ•จํ•˜์ง€ ์•Š๊ธฐ
721
- - ๋ฐ์ดํ„ฐ ๋ฐฑ์—… ๊ถŒ์žฅ
722
- - ๋„คํŠธ์›Œํฌ ์ƒํƒœ ํ™•์ธ
723
- - ๋ธŒ๋ผ์šฐ์ € ์บ์‹œ ์ฃผ๊ธฐ์  ์ •๋ฆฌ
724
-
725
- ## ๐Ÿ” ๋ฌธ์ œ ํ•ด๊ฒฐ
726
- - API Key ์˜ค๋ฅ˜: ํ‚ค ํ˜•์‹ ๋ฐ ์œ ํšจ์„ฑ ํ™•์ธ
727
- - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
728
- - ํŒŒ์ผ ์—…๋กœ๋“œ ์‹คํŒจ ์‹œ ํŒŒ์ผ ํฌ๊ธฐ ๋ฐ ํ˜•์‹ ํ™•์ธ
729
- - ๋ณ€ํ™˜ ์‹คํŒจ ์‹œ ๋ฐ์ดํ„ฐ ์ธ์ฝ”๋”ฉ ํ™•์ธ
730
- - ์‘๋‹ต์ด ๋Š๋ฆด ๊ฒฝ์šฐ ๋ฐ์ดํ„ฐ ํฌ๊ธฐ ์กฐ์ •
731
- """)
732
-
733
- gr.Markdown("### [email protected]", elem_id="initial-description")
734
-
735
- if __name__ == "__main__":
736
- demo.launch(share=True)
 
 
 
1
  import os
2
+ exec(os.environ.get('APP'))