File size: 2,414 Bytes
eb91ddc
10cfa3b
5a5050b
10cfa3b
59ff216
eb91ddc
 
 
 
ce2a837
eb91ddc
 
5a5050b
eb91ddc
 
 
 
 
6440aaf
eb91ddc
 
 
 
6440aaf
eb91ddc
 
394213a
eb91ddc
 
 
 
394213a
eb91ddc
 
6440aaf
eb91ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a5050b
eb91ddc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
import tempfile
import soundfile as sf
from transformers import pipeline

# Load models
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)

# Upload audio file
uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])

if uploaded_file is not None:
    # Save the uploaded file to a temporary file
    with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
        temp_audio_file.write(uploaded_file.getbuffer())
        temp_audio_path = temp_audio_file.name

    # Read the audio file using SoundFile
    try:
        # Load audio data
        audio_data, sample_rate = sf.read(temp_audio_path)

        # Transcribing audio
        lecture_text = transcriber(temp_audio_path)["text"]

        # Preprocessing data
        num_words = len(lecture_text.split())
        max_length = min(num_words, 1024)  # BART model max input length is 1024 tokens
        max_length = int(max_length * 0.75)  # Convert max words to approx tokens

        if max_length > 1024:
            lecture_text = lecture_text[:int(1024 / 0.75)]  # Truncate to fit the model's token limit

        # Summarization
        summary = summarizer(
            lecture_text,
            max_length=1024,  # DistilBART max input length is 1024 tokens
            min_length=int(max_length * 0.1),
            truncation=True
        )

        # Clean up the summary text
        if not summary[0]["summary_text"].endswith((".", "!", "?")):
            last_period_index = summary[0]["summary_text"].rfind(".")
            if last_period_index != -1:
                summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1]

        # Questions Generation
        context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
        questions = question_generator(context, max_new_tokens=50)

        # Output
        st.write("\nSummary:\n", summary[0]["summary_text"])
        for question in questions:
            st.write(question["generated_text"])  # Output the generated questions

    except Exception as e:
        st.error(f"Error during processing: {str(e)}")