|
import os |
|
import tempfile |
|
|
|
import torch |
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
from huggingface_hub import notebook_login, InferenceClient |
|
|
|
notebook_login() |
|
|
|
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" |
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
|
AUDIO_MODEL_NAME = ( |
|
"distil-whisper/distil-large-v3" |
|
) |
|
BATCH_SIZE = 8 |
|
|
|
pipe = pipeline( |
|
task="automatic-speech-recognition", |
|
model=AUDIO_MODEL_NAME, |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
def transcribe(audio_input): |
|
"""Function to convert audio to text.""" |
|
if audio_input is None: |
|
raise gr.Error("No audio file submitted.") |
|
|
|
output = pipe(audio_input, batch_size=BATCH_SIZE, |
|
generate_kwargs={"task": "transcribe"}, |
|
return_timestamps=True) |
|
return output["text"] |
|
|
|
client = InferenceClient() |
|
|
|
def build_messages(meeting_transcript) -> list: |
|
system_input = "You are an assitant that organizes meeting minutes." |
|
user_input = """Take this raw meeting transcript and return an organized version. |
|
Here is the transcript: |
|
{meeting_transcript} |
|
""".format( |
|
meeting_transcript=meeting_transcript |
|
) |
|
|
|
messages = [ |
|
{"role": "system", "content": system_input}, |
|
{"role": "user", "content": user_input}, |
|
] |
|
return messages |
|
|
|
def organize_text(meeting_transcript): |
|
messages = build_messages(meeting_transcript) |
|
response = client.chat_completion(messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430) |
|
return response.choices[0].message.content |
|
|
|
def meeting_transcript_tool(audio_input): |
|
meeting_text = transcribe(audio_input) |
|
organized_text = organize_text(meeting_text) |
|
return organized_text |
|
|
|
demo = gr.Interface( |
|
fn=meeting_transcript_tool, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=gr.Textbox(show_copy_button=True), |
|
title="The Complete Meeting Transcription tool", |
|
) |
|
|
|
demo.launch() |