fuse / app.py
jjbits's picture
remove hf login
da74282
import os
import tempfile
import torch
import gradio as gr
from transformers import pipeline
from huggingface_hub import notebook_login, InferenceClient
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
device = 0 if torch.cuda.is_available() else "cpu"
AUDIO_MODEL_NAME = (
"distil-whisper/distil-large-v3"
)
BATCH_SIZE = 8
pipe = pipeline(
task="automatic-speech-recognition",
model=AUDIO_MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(audio_input):
"""Function to convert audio to text."""
if audio_input is None:
raise gr.Error("No audio file submitted.")
output = pipe(audio_input, batch_size=BATCH_SIZE,
generate_kwargs={"task": "transcribe"},
return_timestamps=True)
return output["text"]
client = InferenceClient()
def build_messages(meeting_transcript) -> list:
system_input = "You are an assitant that organizes meeting minutes."
user_input = """Take this raw meeting transcript and return an organized version.
Here is the transcript:
{meeting_transcript}
""".format(
meeting_transcript=meeting_transcript
)
messages = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_input},
]
return messages
def organize_text(meeting_transcript):
messages = build_messages(meeting_transcript)
response = client.chat_completion(messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430)
return response.choices[0].message.content
def meeting_transcript_tool(audio_input):
meeting_text = transcribe(audio_input)
organized_text = organize_text(meeting_text)
return organized_text
demo = gr.Interface(
fn=meeting_transcript_tool,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox(show_copy_button=True),
title="The Complete Meeting Transcription tool",
)
demo.launch()