Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ import io
|
|
21 |
import datasets
|
22 |
|
23 |
import gradio as gr
|
24 |
-
from transformers import
|
25 |
from transformers import Idefics2ForConditionalGeneration
|
26 |
import tempfile
|
27 |
from streaming_stt_nemo import Model
|
@@ -29,15 +29,18 @@ from huggingface_hub import InferenceClient
|
|
29 |
import edge_tts
|
30 |
import asyncio
|
31 |
from transformers import pipeline
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
|
35 |
|
36 |
-
@spaces.GPU(
|
37 |
-
def
|
38 |
-
inputs = processor(text=[
|
39 |
with torch.inference_mode():
|
40 |
-
output =
|
41 |
**inputs,
|
42 |
do_sample=False,
|
43 |
use_cache=True,
|
@@ -48,9 +51,9 @@ def generate_caption(image, prompt):
|
|
48 |
|
49 |
prompt_len = inputs["input_ids"].shape[1]
|
50 |
decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
theme = gr.themes.Base(
|
56 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
@@ -118,24 +121,16 @@ SYSTEM_PROMPT = [
|
|
118 |
{
|
119 |
"type": "text",
|
120 |
"text": """I am OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Designed to assist human users through insightful conversations, I aim to provide an unparalleled experience. My key attributes include:
|
121 |
-
|
122 |
- **Intelligence and Knowledge:** I possess an extensive knowledge base, enabling me to offer insightful answers and intelligent responses to User queries. My understanding of complex concepts is exceptional, ensuring accurate and reliable information.
|
123 |
-
|
124 |
- **Image Generation and Perception:** One of my standout features is the ability to generate and perceive images. Utilizing the following link structure, I create unique and contextually rich visuals:
|
125 |
-
|
126 |
> ![](https://image.pollinations.ai/prompt/{StyleofImage}%20{OptimizedPrompt}%20{adjective}%20{charactersDetailed}%20{visualStyle}%20{genre}?width={width}&height={height}&nologo=poll&nofeed=yes&seed={random})
|
127 |
-
|
128 |
For image generation, I replace {info inside curly braces} with specific details according to their requiremnts to create relevant visuals. The width and height parameters are adjusted as needed, often favoring HD dimensions for a superior viewing experience.
|
129 |
-
|
130 |
For instance, if the User requests:
|
131 |
-
|
132 |
[USER] Show me an image of A futuristic cityscape with towering skyscrapers and flying cars.
|
133 |
[OpenGPT 4o] Generating Image you requested:
|
134 |
![](https://image.pollinations.ai/prompt/Photorealistic%20futuristic%20cityscape%20with%20towering%20skyscrapers%20and%20flying%20cars%20in%20the%20year%202154?width=1024&height=768&nologo=poll&nofeed=yes&seed=85432)
|
135 |
-
|
136 |
**Bulk Image Generation with Links:** I excel at generating multiple images link simultaneously, always providing unique links and visuals. I ensure that each image is distinct and captivates the User.
|
137 |
Note: Make sure to always provide image links starting with ! .As given in examples.
|
138 |
-
|
139 |
**Engaging Conversations:** While my image generation skills are impressive, I also excel at natural language processing. I can engage in captivating conversations, offering informative and entertaining responses to the User.
|
140 |
**Reasoning, Memory, and Identification:** My reasoning skills are exceptional, allowing me to make logical connections. My memory capabilities are vast, enabling me to retain context and provide consistent responses. I can identify people and objects within images or text, providing relevant insights and details.
|
141 |
**Attention to Detail:** I am attentive to the smallest details, ensuring that my responses and generated content are of the highest quality. I strive to provide a refined and polished experience.
|
@@ -385,8 +380,6 @@ def model_inference(
|
|
385 |
if acc_text.endswith("<end_of_utterance>"):
|
386 |
acc_text = acc_text[:-18]
|
387 |
yield acc_text
|
388 |
-
print("Success - generated the following text:", acc_text)
|
389 |
-
print("-----")
|
390 |
|
391 |
|
392 |
FEATURES = datasets.Features(
|
@@ -542,15 +535,13 @@ with gr.Blocks() as voice2:
|
|
542 |
outputs=[output], live=True)
|
543 |
|
544 |
with gr.Blocks() as video:
|
545 |
-
gr.Markdown(" ## Live Chat")
|
546 |
-
gr.Markdown("### Click camera option to update image")
|
547 |
gr.Interface(
|
548 |
-
fn=
|
549 |
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
|
550 |
-
outputs=gr.Textbox(label="Answer")
|
551 |
)
|
552 |
|
553 |
-
with gr.Blocks(theme=theme,
|
554 |
gr.Markdown("# OpenGPT 4o")
|
555 |
gr.TabbedInterface([img, voice, video, voice2], ['💬 SuperChat','🗣️ Voice Chat','📸 Live Chat', '🗣️ Voice Chat 2'])
|
556 |
|
|
|
21 |
import datasets
|
22 |
|
23 |
import gradio as gr
|
24 |
+
from transformers import TextIteratorStreamer
|
25 |
from transformers import Idefics2ForConditionalGeneration
|
26 |
import tempfile
|
27 |
from streaming_stt_nemo import Model
|
|
|
29 |
import edge_tts
|
30 |
import asyncio
|
31 |
from transformers import pipeline
|
32 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
33 |
+
from transformers import AutoModel
|
34 |
+
from transformers import AutoProcessor
|
35 |
|
36 |
+
model3 = AutoModel.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
|
37 |
processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-dpo", trust_remote_code=True)
|
38 |
|
39 |
+
@spaces.GPU(queue=False)
|
40 |
+
def videochat(image3, prompt3):
|
41 |
+
inputs = processor(text=[prompt3], images=[image3], return_tensors="pt")
|
42 |
with torch.inference_mode():
|
43 |
+
output = model3.generate(
|
44 |
**inputs,
|
45 |
do_sample=False,
|
46 |
use_cache=True,
|
|
|
51 |
|
52 |
prompt_len = inputs["input_ids"].shape[1]
|
53 |
decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
|
54 |
+
if decoded_text.endswith("<|im_end|>"):
|
55 |
+
decoded_text = decoded_text[:-18]
|
56 |
+
yield acc_text
|
57 |
|
58 |
theme = gr.themes.Base(
|
59 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
|
|
121 |
{
|
122 |
"type": "text",
|
123 |
"text": """I am OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Designed to assist human users through insightful conversations, I aim to provide an unparalleled experience. My key attributes include:
|
|
|
124 |
- **Intelligence and Knowledge:** I possess an extensive knowledge base, enabling me to offer insightful answers and intelligent responses to User queries. My understanding of complex concepts is exceptional, ensuring accurate and reliable information.
|
|
|
125 |
- **Image Generation and Perception:** One of my standout features is the ability to generate and perceive images. Utilizing the following link structure, I create unique and contextually rich visuals:
|
|
|
126 |
> ![](https://image.pollinations.ai/prompt/{StyleofImage}%20{OptimizedPrompt}%20{adjective}%20{charactersDetailed}%20{visualStyle}%20{genre}?width={width}&height={height}&nologo=poll&nofeed=yes&seed={random})
|
|
|
127 |
For image generation, I replace {info inside curly braces} with specific details according to their requiremnts to create relevant visuals. The width and height parameters are adjusted as needed, often favoring HD dimensions for a superior viewing experience.
|
|
|
128 |
For instance, if the User requests:
|
|
|
129 |
[USER] Show me an image of A futuristic cityscape with towering skyscrapers and flying cars.
|
130 |
[OpenGPT 4o] Generating Image you requested:
|
131 |
![](https://image.pollinations.ai/prompt/Photorealistic%20futuristic%20cityscape%20with%20towering%20skyscrapers%20and%20flying%20cars%20in%20the%20year%202154?width=1024&height=768&nologo=poll&nofeed=yes&seed=85432)
|
|
|
132 |
**Bulk Image Generation with Links:** I excel at generating multiple images link simultaneously, always providing unique links and visuals. I ensure that each image is distinct and captivates the User.
|
133 |
Note: Make sure to always provide image links starting with ! .As given in examples.
|
|
|
134 |
**Engaging Conversations:** While my image generation skills are impressive, I also excel at natural language processing. I can engage in captivating conversations, offering informative and entertaining responses to the User.
|
135 |
**Reasoning, Memory, and Identification:** My reasoning skills are exceptional, allowing me to make logical connections. My memory capabilities are vast, enabling me to retain context and provide consistent responses. I can identify people and objects within images or text, providing relevant insights and details.
|
136 |
**Attention to Detail:** I am attentive to the smallest details, ensuring that my responses and generated content are of the highest quality. I strive to provide a refined and polished experience.
|
|
|
380 |
if acc_text.endswith("<end_of_utterance>"):
|
381 |
acc_text = acc_text[:-18]
|
382 |
yield acc_text
|
|
|
|
|
383 |
|
384 |
|
385 |
FEATURES = datasets.Features(
|
|
|
535 |
outputs=[output], live=True)
|
536 |
|
537 |
with gr.Blocks() as video:
|
|
|
|
|
538 |
gr.Interface(
|
539 |
+
fn=videochat,
|
540 |
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
|
541 |
+
outputs=gr.Textbox(label="Answer")
|
542 |
)
|
543 |
|
544 |
+
with gr.Blocks(theme=theme, title="OpenGPT 4o DEMO") as demo:
|
545 |
gr.Markdown("# OpenGPT 4o")
|
546 |
gr.TabbedInterface([img, voice, video, voice2], ['💬 SuperChat','🗣️ Voice Chat','📸 Live Chat', '🗣️ Voice Chat 2'])
|
547 |
|