File size: 2,087 Bytes
2612e90
0adbfae
aa6143f
 
 
0adbfae
 
 
 
7e6ae8c
d7ce9f8
f8eed92
2612e90
ec2db99
aa6143f
 
 
 
 
 
 
0adbfae
 
 
7f10f20
92c34bf
 
d7ce9f8
aa6143f
 
 
 
ec2db99
aa6143f
 
ec2db99
aa6143f
 
 
 
 
 
 
2612e90
7e6ae8c
2612e90
ec2db99
7f10f20
0d7c116
c2e4987
 
0d7c116
ec2db99
 
2612e90
 
 
92c34bf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
from faster_whisper import WhisperModel
from pydub import AudioSegment
import os
import tempfile
from transformers import pipeline

# ื”ื’ื“ืจืช ื”ืžื•ื“ืœ ืœืชืžืœื•ืœ
model = WhisperModel("ivrit-ai/faster-whisper-v2-d4")

# ื”ื’ื“ืจืช pipeline ืœืกื™ื›ื•ื ืขื ืžื•ื“ืœ ืžื•ืชืื ืœืขื‘ืจื™ืช
summarizer = pipeline("summarization", model="yam-peleg/Hebrew-Mistral-7B-200K")

def transcribe_and_summarize(file_path):
    try:
        # ื‘ื“ื™ืงื” ืื ื”ืงื•ื‘ืฅ ื”ื•ื ื•ื™ื“ืื• ื•ื”ืžืจืช ื•ื™ื“ืื• ืœืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš
        if file_path.endswith((".mp4", ".mov", ".avi", ".mkv")):
            audio_file = convert_video_to_audio(file_path)
        else:
            audio_file = file_path

        # ืชืžืœื•ืœ ื”ืื•ื“ื™ื•
        segments, _ = model.transcribe(audio_file, language="he")
        transcript = " ".join([segment.text for segment in segments])

        # ืกื™ื›ื•ื ื”ืชืžืœื•ืœ ืขื ืฉื™ืžื•ืฉ ื‘-max_new_tokens
        summary = summarizer(transcript, max_new_tokens=100)[0]["summary_text"]

        # ืžื—ื™ืงืช ืงื•ื‘ืฅ ื”ืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš (ืื ื”ื™ื” ื•ื™ื“ืื•)
        if audio_file != file_path:
            os.remove(audio_file)

        return transcript, summary

    except Exception as e:
        return f"ืฉื’ื™ืื” ื‘ืขื™ื‘ื•ื“ ื”ืงื•ื‘ืฅ: {str(e)}", ""

def convert_video_to_audio(video_file):
    # ื™ืฆื™ืจืช ืงื•ื‘ืฅ ืื•ื“ื™ื• ื–ืžื ื™
    temp_audio = tempfile.mktemp(suffix=".wav")
    video = AudioSegment.from_file(video_file)
    video.export(temp_audio, format="wav")
    return temp_audio

# ื”ื’ื“ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=transcribe_and_summarize,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="ืชืžืœื•ืœ"),
        gr.Textbox(label="ืกื™ื›ื•ื")
        ],
    title="ืžืžื™ืจ ืื•ื“ื™ื•/ื•ื™ื“ืื• ืœืชืžืœื•ืœ ื•ืกื™ื›ื•ื",
    description="ื”ืขืœื” ืงื•ื‘ืฅ ืื•ื“ื™ื• ืื• ื•ื™ื“ืื• ืฉืœ ืžืจืฆื” ื•ืงื‘ืœ ืชืžืœื•ืœ ืžืœื ื•ืกื™ื›ื•ื ืงืฆืจ ืฉืœ ื”ืชื•ื›ืŸ."
)

if __name__ == "__main__":
    interface.launch()