Spaces:

DrBardiaGh
/

Forensic-Type2

Runtime error

App Files Files Community

DrBardiaGh commited on Dec 16, 2024

Commit

03c17d1

verified ·

1 Parent(s): 692a2d2

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -53

app.py CHANGED Viewed

@@ -1,64 +1,109 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+import openai
+import os
+# --------------- STEP 1: Load the free STT model from Hugging Face ---------------
+# We use a Persian Wav2Vec2 model
+# Example: 'm3hrdadfi/wav2vec2-large-xlsr-persian-v2'
+stt_model_name = "m3hrdadfi/wav2vec2-large-xlsr-persian-v2"
+tokenizer = Wav2Vec2Tokenizer.from_pretrained(stt_model_name)
+stt_model = Wav2Vec2ForCTC.from_pretrained(stt_model_name)
+# --------------- STEP 2: Configure OpenAI API for GPT-4o mini (fine-tuned) ---------------
+# You need your OpenAI API key. Let's read from environment variable for safety.
+openai.api_key = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY_HERE")
+# If you'd rather hardcode, replace "YOUR_API_KEY_HERE" with your actual key.
+# But do not commit your real key publicly.
+OPENAI_MODEL_NAME = "gpt-4o-mini"  # The exact model name you said you'd use (example)
+# --------------- STT Function ---------------
+def speech_to_text(audio):
+    """
+    audio is a tuple (sample_rate, numpy_data) from Gradio's microphone or an audio file.
+    Convert Persian speech to text using wav2vec2 model offline.
+    """
+    if audio is None:
+        return ""
+    sample_rate, data = audio
+    # Convert to torch tensor
+    input_values = tokenizer(data, return_tensors="pt", sampling_rate=sample_rate).input_values
+    with torch.no_grad():
+        logits = stt_model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = tokenizer.decode(predicted_ids[0])
+    # transcription might be uppercase and missing punctuation. We just return it raw.
+    return transcription.lower()
+# --------------- Correction Function with GPT-4o mini ---------------
+def correct_text_with_gpt(text):
+    """
+    Send the text to OpenAI GPT-4o mini for correction/improvement.
+    The user wants a formal tone consistent with medical/legal usage.
+    Fine-tuned model is presumably specialized for that domain.
+    """
+    if not text.strip():
+        return ""
+    system_message = (
+        "You are a specialized model that receives Persian text and corrects it with a formal tone, "
+        "particularly for medical/legal context. Make the text coherent, unify spacing, and punctuation, "
+        "and ensure a formal writing style. Do not add commentary. Just provide the corrected text."
+    )
+    user_message = f"متن خام: {text}\n\nلطفاً متن فوق را در یک پاراگراف رسمی و اداری اصلاح کن."
+    # We'll call the ChatCompletion API if GPT-4o mini is chat-based.
+    # If it's a completion-based model, code differs. But let's assume it's chat-based for simplicity.
+    response = openai.ChatCompletion.create(
+        model=OPENAI_MODEL_NAME,
+        messages=[
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": user_message}
+        ],
+        temperature=0.2,
+        max_tokens=1024
+    )
+    corrected = response.choices[0].message.content
+    return corrected.strip()
+# --------------- Gradio Interface ---------------
+# We want:
+# - A widget to record or upload audio
+# - A box that shows transcribed text
+# - A button "اصلاح متن" that calls GPT-4o mini
+# - A final output box
+def process_audio_and_correct(audio):
+    # Step A: convert speech to text
+    raw_text = speech_to_text(audio)
+    # Step B: correct with GPT
+    final_text = correct_text_with_gpt(raw_text)
+    return raw_text, final_text
+with gr.Blocks() as demo:
+    gr.Markdown("# وب‌سایت تبدیل گفتار به متن (فارسی) + اصلاح متن با GPT-4o mini")
+    gr.Markdown("در اینجا می‌توانید یک وویس فارسی ضبط یا فایل صوتی آپلود کنید. سپس متن استخراج‌شده و اصلاح‌شده را دریافت کنید.")
+    with gr.Row():
+        audio_in = gr.Audio(source="microphone", type="numpy", label="ضبط صدا ��ا آپلود فایل (رایگان)")
+    transcribed_text = gr.Textbox(label="متن خام (خروجی مرحله اول - رایگان)", lines=3)
+    corrected_text = gr.Textbox(label="متن اصلاح‌شده (با GPT-4o mini - پولی)", lines=3)
+    btn_process = gr.Button("تبدیل و اصلاح")
+    def audio_workflow(audio):
+        # step 1: stt
+        raw_text = speech_to_text(audio)
+        transcribed_text.update(raw_text)
+        # step 2: correction
+        corrected = correct_text_with_gpt(raw_text)
+        corrected_text.update(corrected)
+        return raw_text, corrected
+    btn_process.click(fn=process_audio_and_correct, inputs=audio_in, outputs=[transcribed_text, corrected_text])
+demo.launch()