Multimodal-Chat / app.py
Falln87's picture
Create app.py
c2cd1c7 verified
raw
history blame
3.22 kB
import gradio as gr
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from diffusers import StableDiffusionPipeline, DiffusionPipeline
from huggingface_hub import HfApi
# Set up Hugging Face API
api = HfApi()
# Define a function to load a language model
def load_language_model(model_name):
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
# Define a function to generate text with a language model
def generate_text(model, tokenizer, prompt):
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model(**inputs)
return tokenizer.decode(outputs.logits[0], skip_special_tokens=True)
# Define a function to generate an image with Stable Diffusion
def generate_image(prompt, model_name):
pipe = StableDiffusionPipeline.from_pretrained(model_name)
image = pipe(prompt, num_inference_steps=50).images[0]
return image
# Define a function to generate video or music with other diffusion models
def generate_media(prompt, model_name, media_type):
pipe = DiffusionPipeline.from_pretrained(model_name)
if media_type == "video":
output = pipe(prompt, num_inference_steps=50).videos[0]
elif media_type == "music":
output = pipe(prompt, num_inference_steps=50).audios[0]
return output
# Create a Gradio interface
with gr.Blocks() as demo:
with gr.Tab("Chat"):
with gr.Row():
language_model_input = gr.Textbox(label="Language Model")
query_button = gr.Button("Query HuggingFace Hub")
chat_input = gr.Textbox(label="Chat Input")
chat_output = gr.Textbox(label="Chat Output")
generate_button = gr.Button("Generate Text")
with gr.Tab("Image Generation"):
image_input = gr.Textbox(label="Image Prompt")
image_model_input = gr.Textbox(label="Image Model")
generate_image_button = gr.Button("Generate Image")
image_output = gr.Image(label="Generated Image")
with gr.Tab("Media Generation"):
media_input = gr.Textbox(label="Media Prompt")
media_model_input = gr.Textbox(label="Media Model")
media_type_input = gr.Radio(label="Media Type", choices=["video", "music"])
generate_media_button = gr.Button("Generate Media")
media_output = gr.Video(label="Generated Media") if media_type_input == "video" else gr.Audio(label="Generated Media")
# Query Hugging Face Hub for language models
query_button.click(fn=lambda x: [model.modelId for model in api.list_models(filter=x)], inputs=language_model_input, outputs=language_model_input)
# Generate text with a language model
generate_button.click(fn=generate_text, inputs=[language_model_input, chat_input], outputs=chat_output)
# Generate an image with Stable Diffusion
generate_image_button.click(fn=generate_image, inputs=[image_input, image_model_input], outputs=image_output)
# Generate video or music with other diffusion models
generate_media_button.click(fn=generate_media, inputs=[media_input, media_model_input, media_type_input], outputs=media_output)
demo.launch()