import gradio as gr import os from huggingface_hub import InferenceClient from transformers import AutoTokenizer HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"] def transcript_audio(audio_file) -> str: model = "openai/whisper-large-v3" api = InferenceClient(model, token=HUGGINGFACE_API_KEY) text = api.automatic_speech_recognition(audio_file) return text def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str: llm_model = "meta-llama/Meta-Llama-3-70B-Instruct" api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY) tokenizer = AutoTokenizer.from_pretrained(llm_model, token=HUGGINGFACE_API_KEY) if conclusion: user_chat = f"Summarize the following text, into {bullet_points} bullet points and a conclusion:\n{text}" else: user_chat = ( f"Summarize the following text, into {bullet_points} bullet points:\n{text}" ) chat = [ {"role": "system", "content": "You are a Meeting Summarizer AI. You well help summarize the text into bullet points and a conclusion. Please return a Markdown formatted text. Remember to give it a title."}, {"role": "user", "content": user_chat}, ] prompt = tokenizer.apply_chat_template( chat, tokenize=False, add_generation_prompt=True ) summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True) print(summary) return summary def control(audio_file, text: str, bullet_points: int, conclusion: bool) -> str: if audio_file: text = transcript_audio(audio_file) summary = summarize_text(text, bullet_points, conclusion) return summary # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary iface = gr.Interface( fn=control, inputs=[ gr.components.Audio(label="Audio file", type="filepath"), gr.components.Textbox(lines=5, label="Text"), gr.components.Slider( minimum=1, maximum=10, value=5, step=1, label="Number of bullet points" ), gr.components.Checkbox(label="Add conclusion"), ], outputs=gr.components.Markdown(label="Summary"), ) iface.launch()