Spaces:
Sleeping
Sleeping
File size: 2,414 Bytes
eb91ddc 10cfa3b 5a5050b 10cfa3b 59ff216 eb91ddc ce2a837 eb91ddc 5a5050b eb91ddc 6440aaf eb91ddc 6440aaf eb91ddc 394213a eb91ddc 394213a eb91ddc 6440aaf eb91ddc 5a5050b eb91ddc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import tempfile
import soundfile as sf
from transformers import pipeline
# Load models
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)
# Upload audio file
uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
if uploaded_file is not None:
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
temp_audio_file.write(uploaded_file.getbuffer())
temp_audio_path = temp_audio_file.name
# Read the audio file using SoundFile
try:
# Load audio data
audio_data, sample_rate = sf.read(temp_audio_path)
# Transcribing audio
lecture_text = transcriber(temp_audio_path)["text"]
# Preprocessing data
num_words = len(lecture_text.split())
max_length = min(num_words, 1024) # BART model max input length is 1024 tokens
max_length = int(max_length * 0.75) # Convert max words to approx tokens
if max_length > 1024:
lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit
# Summarization
summary = summarizer(
lecture_text,
max_length=1024, # DistilBART max input length is 1024 tokens
min_length=int(max_length * 0.1),
truncation=True
)
# Clean up the summary text
if not summary[0]["summary_text"].endswith((".", "!", "?")):
last_period_index = summary[0]["summary_text"].rfind(".")
if last_period_index != -1:
summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1]
# Questions Generation
context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
questions = question_generator(context, max_new_tokens=50)
# Output
st.write("\nSummary:\n", summary[0]["summary_text"])
for question in questions:
st.write(question["generated_text"]) # Output the generated questions
except Exception as e:
st.error(f"Error during processing: {str(e)}")
|