AFischer1985 commited on
Commit
8691fee
·
verified ·
1 Parent(s): 8d8b439

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +292 -74
run.py CHANGED
@@ -1,19 +1,38 @@
1
- #############################################################################################################
2
- # Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
3
  # Author: Andreas Fischer
4
- # Date: December 30th, 2023
5
- # Last update: January 2nd, 2023
6
- ##############################################################################################################
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Chroma-DB
10
  #-----------
11
  import os
12
  import chromadb
13
- dbPath="/home/af/Schreibtisch/gradio/Chroma/db"
14
- if(os.path.exists(dbPath)==False):
15
- dbPath="/home/user/app/db"
 
 
16
  print(dbPath)
 
17
  #client = chromadb.Client()
18
  path=dbPath
19
  client = chromadb.PersistentClient(path=path)
@@ -22,69 +41,213 @@ print(client.get_version())
22
  print(client.list_collections())
23
  from chromadb.utils import embedding_functions
24
  default_ef = embedding_functions.DefaultEmbeddingFunction()
25
- sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
26
  #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
 
 
27
  print(str(client.list_collections()))
28
 
29
  global collection
30
- if("name=ChromaDB1" in str(client.list_collections())):
31
- print("ChromaDB1 found!")
32
- collection = client.get_collection(name="ChromaDB1", embedding_function=sentence_transformer_ef)
 
 
 
 
33
  else:
34
- print("ChromaDB1 created!")
 
35
  collection = client.create_collection(
36
- "ChromaDB1",
37
- embedding_function=sentence_transformer_ef,
38
  metadata={"hnsw:space": "cosine"})
39
-
 
 
 
 
 
 
 
 
40
  collection.add(
41
- documents=[
42
- "Text generating AI model mistralai/Mixtral-8x7B-Instruct-v0.1: Suitable for text generation, e.g., social media content, marketing copy, blog posts, short stories, etc.",
43
- "Image generating AI model stabilityai/sdxl-turbo: Suitable for image generation, e.g., illustrations, graphics, AI art, etc.",
44
- "Audio transcribing AI model openai/whisper-large-v3: Suitable for audio-transcription in different languages",
45
- "Speech synthesizing AI model coqui/XTTS-v2: Suitable for generating audio from text and for voice-cloning",
46
- "Code generating AI model deepseek-ai/deepseek-coder-6.7b-instruct: Suitable for programming in Python, JavaScript, PHP, Bash and many other programming languages.",
47
- "Translation AI model Helsinki-NLP/opus-mt: Suitable for translating text, e.g., from English to German or vice versa",
48
- "Search result-integrating AI model phind/phind-v9-model: Suitable for researching current topics and for obtaining precise and up-to-date answers to questions based on web search results"
49
  ],
50
- metadatas=[{"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}, {"source": "AF"}],
51
- ids=["ai1", "ai2", "ai3", "ai4", "ai5", "ai6", "ai7"],
 
 
 
 
52
  )
 
 
 
 
 
53
 
54
- print("Database ready!")
55
- print(collection.count())
56
 
57
 
58
  # Model
59
  #-------
 
60
 
61
- from huggingface_hub import InferenceClient
62
- import gradio as gr
63
-
64
- client = InferenceClient(
65
- "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
 
66
  #"mistralai/Mistral-7B-Instruct-v0.1"
67
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
 
 
 
 
 
 
70
  # Gradio-GUI
71
  #------------
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  import gradio as gr
 
74
  import json
 
 
 
75
 
76
- def format_prompt(message, history):
77
- prompt = "<s>"
78
- #for user_prompt, bot_response in history:
79
- # prompt += f"[INST] {user_prompt} [/INST]"
80
- # prompt += f" {bot_response}</s> "
81
- prompt += f"[INST] {message} [/INST]"
82
- return prompt
83
-
84
- def response(
85
- prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
86
- ):
87
- temperature = float(temperature)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  if temperature < 1e-2: temperature = 1e-2
89
  top_p = float(top_p)
90
  generate_kwargs = dict(
@@ -95,31 +258,86 @@ def response(
95
  do_sample=True,
96
  seed=42,
97
  )
98
- addon=""
99
- results=collection.query(
100
- query_texts=[prompt],
101
- n_results=2,
102
- #where={"source": "google-docs"}
103
- #where_document={"$contains":"search_string"}
104
- )
105
- dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
106
- sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
107
- results=results['documents'][0]
108
- combination = zip(results,dists,sources)
109
- combination = [' '.join(triplets) for triplets in combination]
110
- print(combination)
111
- if(len(results)>1):
112
- addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
113
- system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
114
- #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
115
- formatted_prompt = format_prompt(system+"\n"+prompt, history)
116
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
117
- output = ""
118
- for response in stream:
119
- output += response.token.text
120
- yield output
121
- output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
122
- yield output
123
-
124
- gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten geeigneten KI-Tools empfiehlt.<br>Aktuell bin ich wenig mehr als eine Tech-Demo und kenne nur 7 KI-Modelle - also sei bitte nicht zu streng mit mir.<br>Was ist dein Anliegen?"]],render_markdown=True),title="German AI-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
125
- print("Interface up and running!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #########################################################################################
2
+ # Title: Gradio Interface to LLM-chatbot with memory RAG on premises
3
  # Author: Andreas Fischer
4
+ # Date: October 15th, 2023
5
+ # Last update: February 22st, 2024
6
+ ##########################################################################################
7
 
8
+ #https://github.com/abetlen/llama-cpp-python/issues/306
9
+ #sudo apt install libclblast-dev
10
+ #CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir -v
11
+
12
+ # Prepare resources
13
+ #-------------------
14
+ import torch
15
+ import gc
16
+ torch.cuda.empty_cache()
17
+ gc.collect()
18
+
19
+ import os
20
+ from datetime import datetime
21
+ global filename
22
+ filename=f"./{datetime.now().strftime('%Y%m%d')}_history.json" # where to store the history as json-file
23
+ if(os.path.exists(filename)==True): os.remove(filename)
24
 
25
  # Chroma-DB
26
  #-----------
27
  import os
28
  import chromadb
29
+ dbPath = "/home/af/Schreibtisch/Code/gradio/Chroma/db"
30
+ onPrem = True if(os.path.exists(dbPath)) else False
31
+ if(onPrem==False): dbPath="/home/user/app/db"
32
+
33
+ #onPrem=False # override automatic detection
34
  print(dbPath)
35
+
36
  #client = chromadb.Client()
37
  path=dbPath
38
  client = chromadb.PersistentClient(path=path)
 
41
  print(client.list_collections())
42
  from chromadb.utils import embedding_functions
43
  default_ef = embedding_functions.DefaultEmbeddingFunction()
44
+ #sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
45
  #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
46
+
47
+ embeddingModel = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer", device="cuda" if(onPrem) else "cpu")
48
  print(str(client.list_collections()))
49
 
50
  global collection
51
+ dbName="historicalChromaDB1"
52
+
53
+ if("name="+dbName in str(client.list_collections())): client.delete_collection(name=dbName) # deletes collection
54
+
55
+ if("name="+dbName in str(client.list_collections())):
56
+ print(dbName+" found!")
57
+ collection = client.get_collection(name=dbName, embedding_function=embeddingModel) #sentence_transformer_ef)
58
  else:
59
+ #client.delete_collection(name=dbName)
60
+ print(dbName+" created!")
61
  collection = client.create_collection(
62
+ dbName,
63
+ embedding_function=embeddingModel,
64
  metadata={"hnsw:space": "cosine"})
65
+
66
+ print("Database ready!")
67
+ print(collection.count())
68
+
69
+ x=collection.get(include=[])["ids"]
70
+ if(len(x)==0):
71
+ message="Ich bin der User."
72
+ response="Hallo User, wie kann ich dienen?"
73
+ x=collection.get(include=[])["ids"]
74
  collection.add(
75
+ documents=[message,response],
76
+ metadatas=[
77
+ {"source": "ICH", "dialog": f"ICH: {message}\nDU: {response}"},
78
+ {"source": "DU", "dialog": f"ICH: {message}\nDU: {response}"}
 
 
 
 
79
  ],
80
+ ids=[str(len(x)+1),str(len(x)+2)]
81
+ )
82
+ RAGResults=collection.query(
83
+ query_texts=[message],
84
+ n_results=1,
85
+ #where={"source": "USER"}
86
  )
87
+ RAGResults["metadatas"][0][0]["dialog"]
88
+
89
+ collection.get()["ids","documents"]
90
+ x=collection.get(include=[])["ids"]
91
+ x
92
 
 
 
93
 
94
 
95
  # Model
96
  #-------
97
+ #onPrem=False
98
 
99
+ if(onPrem==False):
100
+ modelPath="mistralai/Mixtral-8x7B-Instruct-v0.1"
101
+ from huggingface_hub import InferenceClient
102
+ import gradio as gr
103
+ client = InferenceClient(
104
+ modelPath
105
+ #"mistralai/Mixtral-8x7B-Instruct-v0.1"
106
  #"mistralai/Mistral-7B-Instruct-v0.1"
107
+ )
108
+ else:
109
+ import os
110
+ import requests
111
+ import subprocess
112
+ ##modelPath="/home/af/gguf/models/phi-2.Q4_0.gguf"
113
+ #modelPath="/home/af/gguf/models/openchat-3.5-0106.Q4_0.gguf"
114
+ #modelPath="/home/af/gguf/models/decilm-7b-uniform-gqa-q8_0.gguf"
115
+ #modelPath="/home/af/gguf/models/wizardlm-13b-v1.2.Q4_0.gguf"
116
+ #modelPath="/home/af/gguf/models/SauerkrautLM-7b-HerO-q8_0.gguf"
117
+ #modelPath="/home/af/gguf/models/gemma-2b-it-Q4_0.gguf"
118
+ modelPath="/home/af/gguf/models/discolm_german_7b_v1.Q4_0.gguf"
119
+ modelPath="/home/af/gguf/models/gemma-7b-it-Q4_K_M.gguf"
120
+ modelPath="/home/af/gguf/models/gemma-7b-it-Q4_0.gguf"
121
+ #modelPath="/home/af/gguf/models/sauerkrautlm-una-solar-instruct.Q4_0.gguf"
122
+ #modelPath="/home/af/gguf/models/mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
123
+ #modelPath="/home/af/gguf/models/dolphin-2.5-mixtral-8x7b.Q4_0.gguf"
124
+ #modelPath="/home/af/gguf/models/nous-hermes-2-mixtral-8x7b-dpo.Q4_0.gguf"
125
+ if(os.path.exists(modelPath)==False):
126
+ #url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
127
+ #url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
128
+ #url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
129
+ url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
130
+ response = requests.get(url)
131
+ with open("./model.gguf", mode="wb") as file:
132
+ file.write(response.content)
133
+ print("Model downloaded")
134
+ modelPath="./model.gguf"
135
+ print(modelPath)
136
+ n="20"
137
+ if("mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
138
+ command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
139
+ subprocess.Popen(command)
140
+ print("Server ready!")
141
 
142
 
143
+ #import llama_cpp
144
+ #llama_cpp.llama_backend_init(numa=False)
145
+ #params=llama_cpp.llama_context_default_params()
146
+ #params.n_ctx
147
+
148
  # Gradio-GUI
149
  #------------
150
 
151
+ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4): #float("Inf")
152
+ if zeichenlimit is None: zeichenlimit=1000000000 # :-)
153
+ template0="[INST] {system} [/INST]</s>" if onPrem else "[INST] {system} [/INST]</s>" #<s>?
154
+ template1="[INST] {message} [/INST] "
155
+ template2="{response}</s>"
156
+ if("discolm_german_7b" in modelPath): #https://huggingface.co/DiscoResearch/DiscoLM_German_7b_v1
157
+ template0="<|im_start|>system\n{system}<|im_end|>\n"
158
+ template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
159
+ template2="{response}<|im_end|>\n"
160
+ if("mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
161
+ template0="[INST] {system} [/INST]</s>" if onPrem else "[INST] {system} [/INST]</s>" #<s>?
162
+ template1="[INST] {message} [/INST] "
163
+ template2="{response}</s>"
164
+ if("gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
165
+ template0="<start_of_turn>user{system}</end_of_turn>"
166
+ template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
167
+ template2="{response}</end_of_turn>"
168
+ if("Mistral-7B-Instruct" in modelPath): #https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
169
+ template0="[INST] {system} [/INST]</s>" if onPrem else "[INST] {system} [/INST]</s>" #<s>?
170
+ template1="[INST] {message} [/INST] "
171
+ template2="{response}</s>"
172
+ if("openchat-3.5" in modelPath): #https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF
173
+ template0="GPT4 Correct User: {system}<|end_of_turn|>GPT4 Correct Assistant: Okay.<|end_of_turn|>"
174
+ template1="GPT4 Correct User: {message}<|end_of_turn|>GPT4 Correct Assistant: "
175
+ template2="{response}<|end_of_turn|>"
176
+ if("SauerkrautLM-7b-HerO" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
177
+ template0="<|im_start|>system\n{system}<|im_end|>\n"
178
+ template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
179
+ template2="{response}<|im_end|>\n"
180
+ if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
181
+ template0="{system} " #<s>
182
+ template1="USER: {message} ASSISTANT: "
183
+ template2="{response}</s>"
184
+ if("phi-2" in modelPath): #https://huggingface.co/TheBloke/phi-2-GGUF
185
+ template0="Instruct: {system}\nOutput: Okay.\n"
186
+ template1="Instruct: {message}\nOutput:"
187
+ template2="{response}\n"
188
+ prompt = ""
189
+ if RAGAddon is not None:
190
+ system += RAGAddon
191
+ if system is not None:
192
+ prompt += template0.format(system=system) #"<s>"
193
+ if history is not None:
194
+ for user_message, bot_response in history[-historylimit:]:
195
+ if user_message is not None: prompt += template1.format(message=user_message[:zeichenlimit]) #"[INST] {user_prompt} [/INST] "
196
+ if bot_response is not None: prompt += template2.format(response=bot_response[:zeichenlimit]) #"{bot_response}</s> "
197
+ if message is not None: prompt += template1.format(message=message[:zeichenlimit]) #"[INST] {message} [/INST]"
198
+ if system2 is not None:
199
+ prompt += system2
200
+ return prompt
201
+
202
  import gradio as gr
203
+ import requests
204
  import json
205
+ from datetime import datetime
206
+ import os
207
+ import re
208
 
209
+ def response(message, history,customSysPrompt,settings):
210
+ #print(str(history)) # print history
211
+ #system="Du bist ein KI-basierter Assistent."
212
+ system="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch." if customSysPrompt is None else customSysPrompt
213
+ message=message.replace("[INST]","")
214
+ message=message.replace("[/INST]","")
215
+ message=re.sub("<[|](im_start|im_end|end_of_turn)[|]>", '', message)
216
+ if (settings=="Permanent"):
217
+ if((len(history)==0)&(os.path.isfile(filename))): history=json.load(open(filename,'r',encoding="utf-8")) # retrieve history (if available)
218
+ x=collection.get(include=[])["ids"]
219
+ rag=None # RAG is turned off until history gets too long
220
+ historylimit=4
221
+ if(len(x)>(historylimit*2)): # turn on RAG when the database contains entries that are not shown within historylimit
222
+ RAGResults=collection.query(
223
+ query_texts=[message],
224
+ n_results=1,
225
+ #where={"source": "USER"}
226
+ )
227
+ bestMatch=str(RAGResults["metadatas"][0][0]["dialog"])
228
+ #print("Message: "+message+"\n\nBest Match: "+bestMatch)
229
+ rag="\n\n"
230
+ rag += "Mit Blick auf den aktuellen Stand der Session erinnerst du dich insb. an folgende Episode:\n"
231
+ rag += bestMatch
232
+ rag += "\n\nIm Folgenden siehst du den aktuellen Stand der Session."
233
+ rag += "Bitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
234
+ else:
235
+ system += "\nBitte beschreibe kurz den weiteren Verlauf bis zur nächsten Handlung des Spielers!"
236
+ system2=None # system2 can be used as fictive first words of the AI, which are not displayed or stored
237
+ #print("RAG: "+rag)
238
+ #print("System: "+system+"\n\nMessage: "+message)
239
+ prompt=extend_prompt(message,history,system,rag,system2,historylimit=historylimit)
240
+ print("\n\n*** Prompt:\n"+prompt+"\n***\n\n")
241
+
242
+ ## Request response from model
243
+ #------------------------------
244
+
245
+ print("AI running on prem!" if(onPrem) else "AI running HFHub!")
246
+ if(onPrem==False):
247
+ temperature=float(0.9)
248
+ max_new_tokens=500
249
+ top_p=0.95
250
+ repetition_penalty=1.0
251
  if temperature < 1e-2: temperature = 1e-2
252
  top_p = float(top_p)
253
  generate_kwargs = dict(
 
258
  do_sample=True,
259
  seed=42,
260
  )
261
+ stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
262
+ response = ""
263
+ #print("User: "+message+"\nAI: ")
264
+ for text in stream:
265
+ part=text.token.text
266
+ #print(part, end="", flush=True)
267
+ response += part
268
+ yield response
269
+ history.append((message, response)) # add current dialog to history
270
+ # Store current state in DB if settings=="Permanent"
271
+ if (settings=="Permanent"):
272
+ x=collection.get(include=[])["ids"] # add current dialog to db
273
+ collection.add(
274
+ documents=[message,response],
275
+ metadatas=[
276
+ { "source": "ICH", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"},
277
+ { "source": "DU", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"}
278
+ ],
279
+ ids=[str(len(x)+1),str(len(x)+2)]
280
+ )
281
+ json.dump(history,open(filename,'w',encoding="utf-8"),ensure_ascii=False)
282
+
283
+ if(onPrem==True):
284
+ # url="https://afischer1985-wizardlm-13b-v1-2-q4-0-gguf.hf.space/v1/completions"
285
+ url="http://0.0.0.0:2600/v1/completions"
286
+ body={"prompt":prompt,"max_tokens":None, "echo":"False","stream":"True"} # e.g. Mixtral-Instruct
287
+ if("discolm_german_7b" in modelPath): body.update({"stop": ["<|im_end|>"]}) # fix stop-token of DiscoLM
288
+ if("gemma-" in modelPath): body.update({"stop": ["<|im_end|>","</end_of_turn>"]}) # fix stop-token of Gemma
289
+ response="" #+"("+myType+")\n"
290
+ buffer=""
291
+ #print("URL: "+url)
292
+ #print("User: "+message+"\nAI: ")
293
+ for text in requests.post(url, json=body, stream=True): #-H 'accept: application/json' -H 'Content-Type: application/json'
294
+ if buffer is None: buffer=""
295
+ buffer=str("".join(buffer))
296
+ # print("*** Raw String: "+str(text)+"\n***\n")
297
+ text=text.decode('utf-8')
298
+ if((text.startswith(": ping -")==False) & (len(text.strip("\n\r"))>0)): buffer=buffer+str(text)
299
+ # print("\n*** Buffer: "+str(buffer)+"\n***\n")
300
+ buffer=buffer.split('"finish_reason": null}]}')
301
+ if(len(buffer)==1):
302
+ buffer="".join(buffer)
303
+ pass
304
+ if(len(buffer)==2):
305
+ part=buffer[0]+'"finish_reason": null}]}'
306
+ if(part.lstrip('\n\r').startswith("data: ")): part=part.lstrip('\n\r').replace("data: ", "")
307
+ try:
308
+ part = str(json.loads(part)["choices"][0]["text"])
309
+ #print(part, end="", flush=True)
310
+ response=response+part
311
+ buffer="" # reset buffer
312
+ except Exception as e:
313
+ print("Exception:"+str(e))
314
+ pass
315
+ yield response
316
+ history.append((message, response)) # add current dialog to history
317
+ # Store current state in DB if settings=="Permanent"
318
+ if (settings=="Permanent"):
319
+ x=collection.get(include=[])["ids"] # add current dialog to db
320
+ collection.add(
321
+ documents=[message,response],
322
+ metadatas=[
323
+ { "source": "ICH", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"},
324
+ { "source": "DU", "dialog": f"ICH: {message.strip()}\n DU: {response.strip()}", "type":"episode"}
325
+ ],
326
+ ids=[str(len(x)+1),str(len(x)+2)]
327
+ )
328
+ json.dump(history,open(filename,'w',encoding="utf-8"),ensure_ascii=False)
329
+
330
+ gr.ChatInterface(
331
+ response,
332
+ chatbot=gr.Chatbot(render_markdown=True),
333
+ title="AI-Interface (on prem)" if onPrem else "AI-Interface (HFHub)",
334
+ additional_inputs=[
335
+ gr.Textbox(value="Lass uns ein Rollenspiel spielen. Wir spielen Shadowrun. Du bist der Spielleiter und sprichst Deutsch.",label="System Prompt"),
336
+ gr.Dropdown(["Permanent","Temporär"],value="Temorär",label="Dialog speichern?")
337
+ ]
338
+ ).queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
339
+ print("Interface up and running!")
340
+
341
+
342
+
343
+