Spaces:

sandz7
/

smart-reader

Runtime error

App Files Files Community

smart-reader / app.py

sandz7

commit with 2 UI

46e059f 9 months ago

raw

history blame contribute delete

6.89 kB

	import torch
	from diffusers import DiffusionPipeline
	import gradio as gr
	import numpy as np
	import openai
	import os
	import spaces
	import base64

	# Setup logging
	# logging.basicConfig(level=logging.DEBUG)
	# logger = logging.getLogger(__name__)

	# Retrieve the OpenAI API key from the environment
	API_KEY = os.getenv('OPEN_AI_API_KEY')

	DESCRIPTION = '''
	<div>
	<h1 style="text-align: center;">Book-Reader</h1>
	<p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p>
	<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
	</div>
	'''

	# load both base and refiner
	base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0")
	refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
	text_encoder_2=base.text_encoder_2,
	vae=base.vae,
	torch_dtype=torch.float16,
	use_safetensor=True,
	variant="fp16").to("cuda:0")

	chat_mode = {}

	def encode_image(image_path):
	chat_mode["the_mode"] = "diffusing"
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def generation(message, history):
	"""
	Generates a response based on the input message and optionally an image.
	"""
	global chat_mode
	image_path = None
	if "files" in message and message["files"]:
	if type(message["files"][-1]) == dict:
	image_path = message["files"][-1]["path"]
	else:
	image_path = message["files"][-1]
	else:
	for hist in history:
	if type(hist[0]) == tuple:
	image_path = hist[0][0]

	input_prompt = message if isinstance(message, str) else message.get("text", "")

	if image_path is None:
	chat_mode["mode"] = "text"
	client = openai.OpenAI(api_key=API_KEY)
	stream = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
	{"role": "user", "content": input_prompt}],
	stream=True,
	)
	return stream
	else:
	chat_mode["mode"] = "image"
	base64_image = encode_image(image_path=image_path)
	client = openai.OpenAI(api_key=API_KEY)
	stream = client.chat.completions.create(
	model="gpt-4o",
	messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
	{"role": "user", "content": [
	{"type": "text", "text": input_prompt},
	{"type": "image_url", "image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}}
	]}],
	stream=True,
	)
	return stream

	# function to take input and generate text tokena
	@spaces.GPU(duration=120)
	def diffusing(prompt: str,
	n_steps: int,
	denoising: float):
	"""
	Takes input, passes it into the pipeline,
	get the top 5 scores, and ouput those scores into images
	"""

	# Generate image based on text
	image_base = base(
	prompt=prompt,
	num_inference_steps=n_steps,
	denoising_end=denoising,
	output_type="latent"
	).images

	image = refiner(
	prompt=prompt,
	num_inference_steps=n_steps,
	denoising_start=denoising,
	image=image_base
	).images[0]

	return image

	def check_cuda_availability():
	if torch.cuda.is_available():
	return f"GPU: {torch.cuda.get_device_name(0)}"
	else:
	return "No CUDA device found."

	# Image created from diffusing
	image_created = {}

	@spaces.GPU(duration=120)
	def bot_comms(message, history):
	"""
	Handles communication between Gradio and the models.
	"""

	# ensures message is a dictionary
	if not isinstance(message, dict):
	message = {"text": message}

	if message["text"] == "check cuda":
	yield check_cuda_availability()
	return

	buffer = ""
	gpt_outputs = []
	stream = generation(message, history)

	for chunk in stream:
	if chunk.choices[0].delta.content is not None:
	text = chunk.choices[0].delta.content
	if text:
	gpt_outputs.append(text)
	buffer += text
	yield "".join(gpt_outputs)

	chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)

	with gr.Blocks(fill_height=True) as demo:
	with gr.Row():
	# Diffusing
	with gr.Column():
	gr.Markdown(DESCRIPTION)
	image_prompt = gr.Textbox(label="Image Prompt")
	output_image = gr.Image(label="Generated Image")
	generate_image_button = gr.Button("Generate Image")
	# generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image)
	with gr.Accordion(label="⚙️ Parameters", open=False):
	steps_slider = gr.Slider(
	minimum=20,
	maximum=100,
	step=1,
	value=40,
	label="Number of Inference Steps"
	)
	denoising_slider = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	step=0.1,
	value=0.8,
	label="High Noise Fraction"
	)
	generate_image_button.click(
	fn=diffusing,
	inputs=[image_prompt, steps_slider, denoising_slider],
	outputs=output_image
	)
	with gr.Column():
	# GPT-3.5
	gr.Markdown('''
	<div>
	<h1 style="text-align: center;">Smart Reader</h1>
	<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
	<p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p>
	</div>
	''')
	chat = gr.ChatInterface(fn=bot_comms,
	multimodal=True,
	textbox=chat_input)

	demo.launch()