File size: 2,151 Bytes
2612e90
0adbfae
aa6143f
 
 
0adbfae
 
 
 
7e6ae8c
0adbfae
7e6ae8c
2612e90
ec2db99
aa6143f
 
 
 
 
 
 
0adbfae
 
 
7f10f20
 
aa6143f
0d7c116
 
aa6143f
 
 
 
ec2db99
aa6143f
 
ec2db99
aa6143f
 
 
 
 
 
 
2612e90
7e6ae8c
2612e90
ec2db99
7f10f20
0d7c116
c2e4987
 
0d7c116
ec2db99
 
2612e90
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from faster_whisper import WhisperModel
from pydub import AudioSegment
import os
import tempfile
from transformers import pipeline

# ื”ื’ื“ืจืช ื”ืžื•ื“ืœ ืœืชืžืœื•ืœ
model = WhisperModel("ivrit-ai/faster-whisper-v2-d4")

# ื”ื’ื“ืจืช pipeline ืœืกื™ื›ื•ื
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def transcribe_and_summarize(file_path):
    try:
        # ื‘ื“ื™ืงื” ืื ื”ืงื•ื‘ืฅ ื”ื•ื ื•ื™ื“ืื• ื•ื”ืžืจืช ื•ื™ื“ืื• ืœืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš
        if file_path.endswith((".mp4", ".mov", ".avi", ".mkv")):
            audio_file = convert_video_to_audio(file_path)
        else:
            audio_file = file_path

        # ืชืžืœื•ืœ ื”ืื•ื“ื™ื•
        segments, _ = model.transcribe(audio_file, language="he")
        transcript = " ".join([segment.text for segment in segments])

        # ืกื™ื›ื•ื ื”ืชืžืœื•ืœ
        
        summary = summarizer(transcript, max_length=50, min_length=25, do_sample=False)[0]["summary_text"]
        prompt_text = f"ืกื›ื ืืช ื”ืชืžืœื•ืœ ื”ื‘ื ื›ืฉื™ืขื•ืจ ืืงื“ืžื™ ื‘ืขื‘ืจื™ืช:\n{transcript}"
        # ืžื—ื™ืงืช ืงื•ื‘ืฅ ื”ืื•ื“ื™ื• ื‘ืžื™ื“ืช ื”ืฆื•ืจืš (ืื ื”ื™ื” ื•ื™ื“ืื•)
        if audio_file != file_path:
            os.remove(audio_file)

        return transcript, summary

    except Exception as e:
        return f"ืฉื’ื™ืื” ื‘ืขื™ื‘ื•ื“ ื”ืงื•ื‘ืฅ: {str(e)}", ""

def convert_video_to_audio(video_file):
    # ื™ืฆื™ืจืช ืงื•ื‘ืฅ ืื•ื“ื™ื• ื–ืžื ื™
    temp_audio = tempfile.mktemp(suffix=".wav")
    video = AudioSegment.from_file(video_file)
    video.export(temp_audio, format="wav")
    return temp_audio

# ื”ื’ื“ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=transcribe_and_summarize,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="ืชืžืœื•ืœ"),
        gr.Textbox(label="ืกื™ื›ื•ื")
        ],
    title="ืžืžื™ืจ ืื•ื“ื™ื•/ื•ื™ื“ืื• ืœืชืžืœื•ืœ ื•ืกื™ื›ื•ื",
    description="ื”ืขืœื” ืงื•ื‘ืฅ ืื•ื“ื™ื• ืื• ื•ื™ื“ืื• ืฉืœ ืžืจืฆื” ื•ืงื‘ืœ ืชืžืœื•ืœ ืžืœื ื•ืกื™ื›ื•ื ืงืฆืจ ืฉืœ ื”ืชื•ื›ืŸ."
)

if __name__ == "__main__":
    interface.launch()