File size: 1,909 Bytes
e228d26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import tempfile
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import notebook_login, InferenceClient
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = (
"distil-whisper/distil-large-v3"
)
BATCH_SIZE = 8
pipe = pipeline(
task="automatic-speech-recognition",
model=AUDIO_MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(audio_input):
"""Function to convert audio to text."""
if audio_input is None:
raise gr.Error("No audio file submitted.")
output = pipe(audio_input, batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True)
return output["text"]
client = InferenceClient()
def build_messages(meeting_transcript) -> list:
system_input = "You are an assitant that organizes meeting minutes."
user_input = """Take this raw meeting transcript and return an organized version.
Here is the transcript:
{meeting_transcript}
""".format(
meeting_transcript=meeting_transcript
)
messages = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_input},
]
return messages
def organize_text(meeting_transcript):
messages = build_messages(meeting_transcript)
response = client.chat_completion(messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430)
return response.choices[0].message.content
def meeting_transcript_tool(audio_input):
meeting_text = transcribe(audio_input)
organized_text = organize_text(meeting_text)
return organized_text
demo = gr.Interface(
fn=meeting_transcript_tool,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(show_copy_button=True),
title="The Complete Meeting Transcription tool",
)
demo.launch() |