mistral-super-fast

Sleeping

App Files Files Community

wop commited on Feb 29, 2024

Commit

7165422

verified ·

1 Parent(s): b5fc4fe

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -60

app.py CHANGED Viewed

@@ -1,73 +1,64 @@
-import json
-import gradio as gr
-import random
 from huggingface_hub import InferenceClient
-API_URL = "https://api-inference.huggingface.co/models/"
-client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 def format_prompt(message, history):
-    prompt = "You're a helpful assistant."
     for user_prompt, bot_response in history:
-        prompt += f" [INST] {user_prompt} [/INST] {bot_response}</s> "
-    prompt += f" [INST] {message} [/INST]"
     return prompt
-def generate(prompt, history, temperature=0.9, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
-    temperature = float(temperature) if temperature > 0 else 0.01
     top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=random.randint(0, 10**7),
-    )
     formatted_prompt = format_prompt(prompt, history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
-def load_database():
-    try:
-        with open("database.json", "r", encoding="utf-8") as f:
-            data = json.load(f)
-            if not isinstance(data, list):
-                raise ValueError("Invalid data format")
-            return data
-    except (FileNotFoundError, json.JSONDecodeError, ValueError):
-        print("Error loading database: File not found, invalid format, or empty. Creating an empty database.")
-        return []
-def save_database(data):
-    try:
-        with open("database.json", "w", encoding="utf-8") as f:
-            json.dump(data, f, indent=4)
-    except (IOError, json.JSONEncodeError):
-        print("Error saving database: Encountered an issue while saving.")
-def chat_interface(message):
-    database = load_database()
-    if (message, None) not in database:
-        response = next(generate(message, history=[]))
-        database.append((message, response))
-        save_database(database)
     else:
-        _, stored_response = next(item for item in database if item[0] == message)
-        response = stored_response
-    return response
-with gr.Interface(fn=chat_interface, inputs="textbox", outputs="textbox", title="Chat Interface") as iface:
-    iface.launch()

 from huggingface_hub import InferenceClient
+import gradio as gr
+import json
+client = InferenceClient(
+    "mistralai/Mistral-7B-Instruct-v0.1"
+)
+DATABASE_PATH = "database.json"
+def load_database():
+    try:
+        with open(DATABASE_PATH, "r") as file:
+            return json.load(file)
+    except FileNotFoundError:
+        return {}
+def save_database(database):
+    with open(DATABASE_PATH, "w") as file:
+        json.dump(database, file)
 def format_prompt(message, history):
+    prompt = "<s>"
     for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
     return prompt
+def generate(
+    prompt, history, temperature=0.9, max_new_tokens=2000, top_p=0.9, repetition_penalty=1.2,
+):
+    database = load_database()  # Load the database
+    temperature = float(temperature)
+    if temperature < 1e-2:
+        temperature = 1e-2
     top_p = float(top_p)
     formatted_prompt = format_prompt(prompt, history)
+    if formatted_prompt in database:
+        response = database[formatted_prompt]
     else:
+        response = client.text_generation(formatted_prompt, details=True, return_full_text=False)
+        response_text = response.generated_tokens[0].text
+        database[formatted_prompt] = response_text
+        save_database(database)  # Save the updated database
+    yield response_text
+css = """
+  #mkd {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
+"""
+with gr.Blocks(css=css) as demo:
+    gr.ChatInterface(
+        generate,
+        examples=[["What is the secret to life?"], ["Write me a recipe for pancakes."], ["Write a short story about Paris."]]
+    )
+demo.launch(debug=True)