File size: 2,170 Bytes
2612e90
0adbfae
aa6143f
 
 
0adbfae
 
 
 
7e6ae8c
d7ce9f8
f8eed92
2612e90
ec2db99
aa6143f
 
 
 
 
 
 
0adbfae
 
 
7f10f20
d7ce9f8
9ba013b
d7ce9f8
 
aa6143f
 
 
 
ec2db99
aa6143f
 
ec2db99
aa6143f
 
 
 
 
 
 
2612e90
7e6ae8c
2612e90
ec2db99
7f10f20
0d7c116
c2e4987
 
0d7c116
ec2db99
 
2612e90
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from faster_whisper import WhisperModel
from pydub import AudioSegment
import os
import tempfile
from transformers import pipeline

# ื”ื’ื“ืจืช ื”ืžื•ื“ืœ ืœืชืžืœื•ืœ
model = WhisperModel("ivrit-ai/faster-whisper-v2-d4")

# ื”ื’ื“ืจืช pipeline ืœืกื™ื›ื•ื ืขื ืžื•ื“ืœ ืžื•ืชืื ืœืขื‘ืจื™ืช
summarizer = pipeline("summarization", model="yam-peleg/Hebrew-Mistral-7B-200K")

def transcribe_and_summarize(file_path):
    try:
        # ื‘ื“ื™ืงื” ืื ื”ืงื•ื‘ืฅ ื”ื•ื ื•ื™ื“ืื• ื•ื”ืžืจืช ื•ื™ื“ืื• ืœืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš
        if file_path.endswith((".mp4", ".mov", ".avi", ".mkv")):
            audio_file = convert_video_to_audio(file_path)
        else:
            audio_file = file_path

        # ืชืžืœื•ืœ ื”ืื•ื“ื™ื•
        segments, _ = model.transcribe(audio_file, language="he")
        transcript = " ".join([segment.text for segment in segments])

        # ืกื™ื›ื•ื ื”ืชืžืœื•ืœ ืขื ืžื•ื“ืœ ื‘ืขื‘ืจื™ืช
        prompt_text = f"ืกื›ื ืืช ื”ืชืžืœื•ืœ ื”ื‘ื ื›ืฉื™ืขื•ืจ ืืงื“ืžื™ ื‘ืขื‘ืจื™ืช:\n{transcript}"
        summary = summarizer(transcript)[0]["summary_text"]

        # ืžื—ื™ืงืช ืงื•ื‘ืฅ ื”ืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš (ืื ื”ื™ื” ื•ื™ื“ืื•)
        if audio_file != file_path:
            os.remove(audio_file)

        return transcript, summary

    except Exception as e:
        return f"ืฉื’ื™ืื” ื‘ืขื™ื‘ื•ื“ ื”ืงื•ื‘ืฅ: {str(e)}", ""

def convert_video_to_audio(video_file):
    # ื™ืฆื™ืจืช ืงื•ื‘ืฅ ืื•ื“ื™ื• ื–ืžื ื™
    temp_audio = tempfile.mktemp(suffix=".wav")
    video = AudioSegment.from_file(video_file)
    video.export(temp_audio, format="wav")
    return temp_audio

# ื”ื’ื“ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=transcribe_and_summarize,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="ืชืžืœื•ืœ"),
        gr.Textbox(label="ืกื™ื›ื•ื")
        ],
    title="ืžืžื™ืจ ืื•ื“ื™ื•/ื•ื™ื“ืื• ืœืชืžืœื•ืœ ื•ืกื™ื›ื•ื",
    description="ื”ืขืœื” ืงื•ื‘ืฅ ืื•ื“ื™ื• ืื• ื•ื™ื“ืื• ืฉืœ ืžืจืฆื” ื•ืงื‘ืœ ืชืžืœื•ืœ ืžืœื ื•ืกื™ื›ื•ื ืงืฆืจ ืฉืœ ื”ืชื•ื›ืŸ."
)

if __name__ == "__main__":
    interface.launch()