|
import requests |
|
import os, sys, json |
|
import gradio as gr |
|
import openai |
|
from openai import OpenAI |
|
import time |
|
import re |
|
import io |
|
from PIL import Image, ImageDraw, ImageOps, ImageFont |
|
import base64 |
|
|
|
from tavily import TavilyClient |
|
|
|
from langchain.chains import LLMChain, RetrievalQA |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.document_loaders import PyPDFLoader, WebBaseLoader, UnstructuredWordDocumentLoader, DirectoryLoader |
|
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader |
|
from langchain.document_loaders.generic import GenericLoader |
|
from langchain.document_loaders.parsers import OpenAIWhisperParser |
|
from langchain.schema import AIMessage, HumanMessage |
|
from langchain.llms import HuggingFaceHub |
|
from langchain.llms import HuggingFaceTextGenInference |
|
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings |
|
|
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.prompts import PromptTemplate |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.vectorstores import Chroma |
|
from chromadb.errors import InvalidDimensionException |
|
from utils import * |
|
from beschreibungen import * |
|
|
|
|
|
|
|
|
|
|
|
from dotenv import load_dotenv, find_dotenv |
|
_ = load_dotenv(find_dotenv()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
splittet = False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HUGGINGFACEHUB_API_TOKEN = os.getenv("HF_ACCESS_READ") |
|
OAI_API_KEY=os.getenv("OPENAI_API_KEY") |
|
HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"} |
|
TAVILY_KEY = os.getenv("TAVILY_KEY") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME= "gpt-4-1106-preview" |
|
MODEL_NAME_IMAGE = "gpt-4-vision-preview" |
|
|
|
|
|
|
|
|
|
|
|
repo_id = "HuggingFaceH4/zephyr-7b-alpha" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME_HF = "mistralai/Mixtral-8x7B-Instruct-v0.1" |
|
MODEL_NAME_OAI_ZEICHNEN = "dall-e-3" |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1" |
|
|
|
|
|
|
|
|
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN |
|
|
|
|
|
|
|
|
|
|
|
|
|
client = OpenAI() |
|
general_assistant_file = client.beta.assistants.create(name="File Analysator",instructions=template, model="gpt-4-1106-preview",) |
|
thread_file = client.beta.threads.create() |
|
general_assistant_suche= openai_assistant_suche(client) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clear_all(): |
|
return None, gr.Image(visible=False), [] |
|
|
|
|
|
|
|
def add_text(chatbot, history, prompt, file): |
|
if (file == None): |
|
chatbot = chatbot +[(prompt, None)] |
|
else: |
|
if (prompt == ""): |
|
chatbot=chatbot + [((file.name,), "Prompt fehlt!")] |
|
else: |
|
chatbot = chatbot +[((file.name,), None), (prompt, None)] |
|
|
|
return chatbot, history, prompt, file, gr.Image(visible = False), "" |
|
|
|
def add_text2(chatbot, prompt): |
|
if (prompt == ""): |
|
chatbot = chatbot + [("", "Prompt fehlt!")] |
|
else: |
|
chatbot = chatbot + [(prompt, None)] |
|
print("chatbot nach add_text............") |
|
print(chatbot) |
|
return chatbot, prompt, "" |
|
|
|
|
|
|
|
def file_anzeigen(file): |
|
ext = analyze_file(file) |
|
if (ext == "png" or ext == "PNG" or ext == "jpg" or ext == "jpeg" or ext == "JPG" or ext == "JPEG"): |
|
return gr.Image(width=47, visible=True, interactive = False, height=47, min_width=47, show_label=False, show_share_button=False, show_download_button=False, scale = 0.5), file, file |
|
else: |
|
return gr.Image(width=47, visible=True, interactive = False, height=47, min_width=47, show_label=False, show_share_button=False, show_download_button=False, scale = 0.5), "data/file.png", file |
|
|
|
def file_loeschen(): |
|
return None, gr.Image(visible = False) |
|
|
|
|
|
|
|
def cancel_outputing(): |
|
reset_textbox() |
|
return "Stop Done" |
|
|
|
def reset_textbox(): |
|
return gr.update(value=""),"" |
|
|
|
|
|
|
|
|
|
def umwandeln_fuer_anzeige(image): |
|
buffer = io.BytesIO() |
|
image.save(buffer, format='PNG') |
|
return buffer.getvalue() |
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_image(image_path, prompt): |
|
|
|
with open(image_path, "rb") as image_file: |
|
encoded_string = base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
|
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {OAI_API_KEY}" |
|
} |
|
payload = { |
|
"model": MODEL_NAME_IMAGE, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": llm_template + prompt |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{encoded_string}" |
|
} |
|
} |
|
] |
|
} |
|
], |
|
"max_tokens": 300 |
|
} |
|
return headers, payload |
|
|
|
|
|
|
|
def create_assistant_file(prompt, file): |
|
global client, general_assistant_file |
|
|
|
file_neu = client.files.create(file=open(file,"rb",),purpose="assistants",) |
|
|
|
|
|
|
|
|
|
updated_assistant = client.beta.assistants.update(general_assistant_file.id,tools=[{"type": "code_interpreter"}, {"type": "retrieval"}],file_ids=[file_neu.id],) |
|
thread_file, run = create_thread_and_run(prompt, client, updated_assistant.id) |
|
run = wait_on_run(run, thread_file, client) |
|
response = get_response(thread_file, client, updated_assistant.id) |
|
result = response.data[1].content[0].text.value |
|
return result |
|
|
|
|
|
|
|
def create_assistant_suche(prompt): |
|
global client |
|
|
|
assistant_suche =openai_assistant_suche(client, template) |
|
thread_suche, run = create_thread_and_run(prompt, client, assistant_suche.id) |
|
run = wait_on_run(run, thread_suche, client) |
|
response = get_response(thread_suche, client, assistant_suche.id) |
|
result = response.data[1].content[0].text.value |
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_auswahl(prompt, file, chatbot, history, rag_option, model_option, openai_api_key, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,): |
|
global splittet |
|
|
|
|
|
|
|
if (rag_option == "An"): |
|
|
|
if not splittet: |
|
splits = document_loading_splitting() |
|
document_storage_chroma(splits) |
|
db = document_retrieval_chroma2() |
|
splittet = True |
|
else: |
|
db=None |
|
splittet = False |
|
|
|
|
|
if (file == None): |
|
result = generate_text(prompt, chatbot, history, rag_option, model_option, openai_api_key, db, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,) |
|
history = history + [(prompt, result)] |
|
else: |
|
|
|
|
|
ext = analyze_file(file) |
|
if (ext == "png" or ext == "PNG" or ext == "jpg" or ext == "jpeg" or ext == "JPG" or ext == "JPEG"): |
|
result= generate_text_zu_bild(file, prompt, k, rag_option, chatbot, db) |
|
else: |
|
result = generate_text_zu_doc(file, prompt, k, rag_option, chatbot, db) |
|
|
|
history = history + [((file,), None),(prompt, result)] |
|
|
|
chatbot[-1][1] = "" |
|
for character in result: |
|
chatbot[-1][1] += character |
|
time.sleep(0.03) |
|
yield chatbot, history, None, "Generating" |
|
if shared_state.interrupted: |
|
shared_state.recover() |
|
try: |
|
yield chatbot, history, None, "Stop: Success" |
|
except: |
|
pass |
|
|
|
|
|
|
|
def generate_bild(prompt, chatbot, model_option_zeichnen='HuggingFace', temperature=0.5, max_new_tokens=4048,top_p=0.6, repetition_penalty=1.3): |
|
global client |
|
if (model_option_zeichnen == "Stable Diffusion"): |
|
print("Bild Erzeugung HF..............................") |
|
|
|
data = {"inputs": prompt} |
|
response = requests.post(API_URL, headers=HEADERS, json=data) |
|
print("fertig Bild") |
|
result = response.content |
|
|
|
image = Image.open(io.BytesIO(result)) |
|
image_64 = umwandeln_fuer_anzeige(image) |
|
chatbot[-1][1]= "<img src='data:image/png;base64,{0}'/>".format(base64.b64encode(image_64).decode('utf-8')) |
|
else: |
|
print("Bild Erzeugung DallE..............................") |
|
|
|
response = client.images.generate(model="dall-e-3",prompt=prompt,size="1024x1024",quality="standard",n=1, response_format='b64_json') |
|
|
|
chatbot[-1][1] = "<img src='data:image/png;base64,{0}'/>".format(response.data[0].b64_json) |
|
|
|
return chatbot, "Success" |
|
|
|
|
|
|
|
|
|
def generate_text_zu_bild(file, prompt, k, rag_option, chatbot, db): |
|
global splittet |
|
print("Text mit Bild ..............................") |
|
print(file) |
|
prompt_neu = prompt |
|
if (rag_option == "An"): |
|
print("Bild mit RAG..............................") |
|
neu_text_mit_chunks = rag_chain2(prompt, db, k) |
|
|
|
|
|
|
|
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, chatbot) |
|
|
|
headers, payload = process_image(file, prompt_neu) |
|
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) |
|
|
|
data = response.json() |
|
|
|
result = data['choices'][0]['message']['content'] |
|
return result |
|
|
|
|
|
|
|
|
|
def generate_text_zu_doc(file, prompt, k, rag_option, chatbot, db): |
|
global splittet |
|
print("text mit doc ..............................") |
|
|
|
prompt_neu = prompt |
|
if (rag_option == "An"): |
|
print("Doc mit RAG..............................") |
|
neu_text_mit_chunks = rag_chain2(prompt, db, k) |
|
|
|
|
|
|
|
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, chatbot) |
|
|
|
result = create_assistant_file(prompt_neu, file) |
|
return result |
|
|
|
|
|
|
|
|
|
|
|
def generate_text (prompt, chatbot, history, rag_option, model_option, openai_api_key, db, k=3, top_p=0.6, temperature=0.5, max_new_tokens=4048, max_context_length_tokens=2048, repetition_penalty=1.3,): |
|
global splittet |
|
print("Text pur..............................") |
|
if (openai_api_key == "" or openai_api_key == "sk-"): |
|
|
|
|
|
openai_api_key= OAI_API_KEY |
|
if (rag_option is None): |
|
raise gr.Error("Retrieval Augmented Generation ist erforderlich.") |
|
if (prompt == ""): |
|
raise gr.Error("Prompt ist erforderlich.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
if (model_option == "OpenAI"): |
|
|
|
print("OpenAI Anfrage.......................") |
|
llm = ChatOpenAI(model_name = MODEL_NAME, openai_api_key = openai_api_key, temperature=temperature) |
|
|
|
if (rag_option == "An"): |
|
history_text_und_prompt = generate_prompt_with_history(prompt, chatbot) |
|
else: |
|
history_text_und_prompt = generate_prompt_with_history_openai(prompt, chatbot) |
|
else: |
|
|
|
print("HF Anfrage.......................") |
|
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 128}) |
|
|
|
|
|
|
|
print("HF") |
|
|
|
history_text_und_prompt = generate_prompt_with_history(prompt, chatbot) |
|
|
|
|
|
if (rag_option == "An"): |
|
print("LLM aufrufen mit RAG: ...........") |
|
result = rag_chain(llm, history_text_und_prompt, db) |
|
|
|
|
|
|
|
|
|
|
|
else: |
|
splittet = False |
|
print("LLM aufrufen ohne RAG: ...........") |
|
result = llm_chain(llm, history_text_und_prompt) |
|
|
|
|
|
if (result == "Ich weiß es nicht."): |
|
print("Suche im Netz": ...........") |
|
result = create_assistant_suche(history_text_und_prompt) |
|
|
|
except Exception as e: |
|
raise gr.Error(e) |
|
|
|
return result |
|
|
|
|
|
|
|
################################################ |
|
#GUI |
|
############################################### |
|
#Beschreibung oben in GUI |
|
################################################ |
|
|
|
#css = """.toast-wrap { display: none !important } """ |
|
#examples=[['Was ist ChtGPT-4?'],['schreibe ein Python Programm, dass die GPT-4 API aufruft.']] |
|
|
|
def vote(data: gr.LikeData): |
|
if data.liked: print("You upvoted this response: " + data.value) |
|
else: print("You downvoted this response: " + data.value) |
|
|
|
|
|
print ("Start GUIneu") |
|
with open("custom.css", "r", encoding="utf-8") as f: |
|
customCSS = f.read() |
|
|
|
#Add Inputs für Tab 2 |
|
additional_inputs = [ |
|
gr.Slider(label="Temperature", value=0.65, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Höhere Werte erzeugen diversere Antworten", visible=True), |
|
gr.Slider(label="Max new tokens", value=1024, minimum=0, maximum=4096, step=64, interactive=True, info="Maximale Anzahl neuer Tokens", visible=True), |
|
gr.Slider(label="Top-p (nucleus sampling)", value=0.6, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Höhere Werte verwenden auch Tokens mit niedrigerer Wahrscheinlichkeit.", visible=True), |
|
gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Strafe für wiederholte Tokens", visible=True) |
|
] |
|
with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo: |
|
#Session Variablen, um Weete zu speichern, auch wenn die Felder in der GUI bereits wieder leer sind |
|
#history parallel zu chatbot speichern - momentan nicht im Gebrauch, ist aber in allen relevanten Methoden mit verlinkt |
|
history = gr.State([]) |
|
#damit der Prompt auch nach dem upload in die History noch für predicts_args verfügbar ist |
|
user_question = gr.State("") |
|
#damit der Prompt auch nach dem upload in die History noch für predicts_args verfügbar ist |
|
user_question2 = gr.State("") |
|
attached_file = gr.State(None) |
|
status_display = gr.State("") |
|
status_display2 = gr.State("") |
|
################################################ |
|
# Tab zum Chatbot mit Text oder Bildeingabe |
|
################################################ |
|
gr.Markdown(description_top) |
|
with gr.Tab("LI Chatbot"): |
|
#with gr.Row(): |
|
#gr.HTML("LI Chatot") |
|
#status_display = gr.Markdown("Success", visible = False, elem_id="status_display") |
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
with gr.Row(): |
|
chatbot = gr.Chatbot(elem_id="li-chat",show_copy_button=True) |
|
with gr.Row(): |
|
with gr.Column(scale=12): |
|
user_input = gr.Textbox( |
|
show_label=False, placeholder="Gib hier deinen Prompt ein...", |
|
container=False |
|
) |
|
with gr.Column(min_width=70, scale=1): |
|
submitBtn = gr.Button("Senden") |
|
with gr.Column(min_width=70, scale=1): |
|
cancelBtn = gr.Button("Stop") |
|
with gr.Row(): |
|
#file_display = gr.File(visible=False) |
|
image_display = gr.Image( visible=False) |
|
upload = gr.UploadButton("📁", file_types=["image", "pdf", "docx", "pptx", "xlsx"], scale = 10) |
|
emptyBtn = gr.ClearButton([user_input, chatbot, history, attached_file, image_display], value="🧹 Neue Session", scale=10) |
|
|
|
with gr.Column(): |
|
with gr.Column(min_width=50, scale=1): |
|
with gr.Tab(label="Parameter Einstellung"): |
|
#gr.Markdown("# Parameters") |
|
rag_option = gr.Radio(["Aus", "An"], label="LI Erweiterungen (RAG)", value = "Aus") |
|
model_option = gr.Radio(["OpenAI", "HuggingFace"], label="Modellauswahl", value = "OpenAI") |
|
|
|
|
|
top_p = gr.Slider( |
|
minimum=-0, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
interactive=True, |
|
label="Top-p", |
|
visible=False, |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=2.0, |
|
value=0.5, |
|
step=0.1, |
|
interactive=True, |
|
label="Temperature", |
|
visible=False |
|
) |
|
max_length_tokens = gr.Slider( |
|
minimum=0, |
|
maximum=512, |
|
value=512, |
|
step=8, |
|
interactive=True, |
|
label="Max Generation Tokens", |
|
visible=False, |
|
) |
|
max_context_length_tokens = gr.Slider( |
|
minimum=0, |
|
maximum=4096, |
|
value=2048, |
|
step=128, |
|
interactive=True, |
|
label="Max History Tokens", |
|
visible=False, |
|
) |
|
repetition_penalty=gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Strafe für wiederholte Tokens", visible=False) |
|
anzahl_docs = gr.Slider(label="Anzahl Dokumente", value=3, minimum=1, maximum=10, step=1, interactive=True, info="wie viele Dokumententeile aus dem Vektorstore an den prompt gehängt werden", visible=False) |
|
openai_key = gr.Textbox(label = "OpenAI API Key", value = "sk-", lines = 1, visible = False) |
|
################################################ |
|
# Tab zum Zeichnen mit Stable Diffusion |
|
################################################ |
|
with gr.Tab("LI Zeichnen"): |
|
#with gr.Row(): |
|
#gr.HTML("LI Zeichnen mit KI") |
|
#status_display2 = gr.Markdown("Success", visible = False, elem_id="status_display") |
|
#gr.Markdown(description2) |
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
with gr.Row(): |
|
chatbot_bild = gr.Chatbot(elem_id="li-zeichnen",show_copy_button=True, show_share_button=True) |
|
with gr.Row(): |
|
with gr.Column(scale=12): |
|
user_input2 = gr.Textbox( |
|
show_label=False, placeholder="Gib hier deinen Prompt ein...", |
|
container=False |
|
) |
|
with gr.Column(min_width=70, scale=1): |
|
submitBtn2 = gr.Button("Senden") |
|
#with gr.Column(min_width=70, scale=1): |
|
#cancelBtn2 = gr.Button("Stop") |
|
with gr.Row(): |
|
emptyBtn2 = gr.ClearButton([user_input, chatbot_bild], value="🧹 Neue Session", scale=10) |
|
#additional_inputs_accordion = gr.Accordion(label="Weitere Eingaben...", open=False) |
|
with gr.Column(): |
|
with gr.Column(min_width=50, scale=1): |
|
with gr.Tab(label="Parameter Einstellung"): |
|
#gr.Markdown("# Parameters") |
|
model_option_zeichnen = gr.Radio(["Stable Diffusion","DallE"], label="Modellauswahl", value = "Stable Diffusion") |
|
|
|
|
|
gr.Markdown(description) |
|
|
|
###################################### |
|
# Events und Übergabe Werte an Funktionen |
|
####################################### |
|
###################################### |
|
# Für Tab 1: Chatbot |
|
#Argumente für generate Funktion als Input |
|
predict_args = dict( |
|
fn=generate_auswahl, |
|
inputs=[ |
|
user_question, |
|
attached_file, |
|
chatbot, |
|
history, |
|
rag_option, |
|
model_option, |
|
openai_key, |
|
anzahl_docs, |
|
top_p, |
|
temperature, |
|
max_length_tokens, |
|
max_context_length_tokens, |
|
repetition_penalty |
|
], |
|
outputs=[chatbot, history, attached_file, status_display], #[chatbot, history, status_display] |
|
show_progress=True, |
|
) |
|
|
|
reset_args = dict( |
|
fn=reset_textbox, inputs=[], outputs=[user_input, status_display] |
|
) |
|
|
|
# Chatbot |
|
transfer_input_args = dict( |
|
fn=add_text, inputs=[chatbot, history, user_input, attached_file], outputs=[chatbot, history, user_question, attached_file, image_display , user_input], show_progress=True |
|
) |
|
|
|
predict_event1 = user_input.submit(**transfer_input_args, queue=False,).then(**predict_args) |
|
predict_event2 = submitBtn.click(**transfer_input_args, queue=False,).then(**predict_args) |
|
predict_event3 = upload.upload(file_anzeigen, [upload], [image_display, image_display, attached_file] ) #.then(**predict_args) |
|
emptyBtn.click(clear_all, [], [attached_file, image_display, history]) |
|
image_display.select(file_loeschen, [], [attached_file, image_display]) |
|
|
|
#Berechnung oder Ausgabe anhalten (kann danach fortgesetzt werden) |
|
cancelBtn.click(cancel_outputing, [], [status_display], cancels=[predict_event1,predict_event2, predict_event3]) |
|
|
|
###################################### |
|
# Für Tab 2: Zeichnen |
|
predict_args2 = dict( |
|
fn=generate_bild, |
|
inputs=[ |
|
user_question2, |
|
chatbot_bild, |
|
model_option_zeichnen, |
|
#additional_inputs, |
|
], |
|
outputs=[chatbot_bild, status_display2], #[chatbot, history, status_display] |
|
show_progress=True, |
|
) |
|
transfer_input_args2 = dict( |
|
fn=add_text2, inputs=[chatbot_bild, user_input2], outputs=[chatbot_bild, user_question2, user_input2], show_progress=True |
|
) |
|
predict_event2_1 = user_input2.submit(**transfer_input_args2, queue=False,).then(**predict_args2) |
|
predict_event2_2 = submitBtn2.click(**transfer_input_args2, queue=False,).then(**predict_args2) |
|
#emptyBtn2.click(clear_all, [], [file_display, image_display]) |
|
|
|
#cancelBtn2.click( |
|
#cancels=[predict_event2_1,predict_event2_2 ] |
|
#) |
|
|
|
|
|
demo.title = "LI-ChatBot" |
|
demo.queue().launch(debug=True) |
|
|
|
|
|
|
|
|
|
|
|
|