asasasText

Runtime error

App Files Files Community

Hjgugugjhuhjggg commited on Nov 23, 2024

Commit

1f0a3a2

verified ·

1 Parent(s): 8806695

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -46

app.py CHANGED Viewed

@@ -14,28 +14,24 @@ from pydantic import BaseModel
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-global_data = {'models': {}, 'tokens': {'eos': 'eos_token', 'pad': 'pad_token', 'padding': 'padding_token', 'unk': 'unk_token', 'bos': 'bos_token', 'sep': 'sep_token', 'cls': 'cls_token', 'mask': 'mask_token'}}
 model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
 models = {}
 def load_model(model_config):
     model_name = model_config['name']
-    if model_name not in models:
-        try:
-            model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
-            models[model_name] = model
-            global_data['models'] = models
-            return model
-        except Exception as e:
-            print(f"Error loading model {model_name}: {e}")
-            return None
 for config in model_configs:
     model = load_model(config)
     if model is None:
-        print(f"Failed to load model {config['name']}. Exiting.")
         exit(1)
 class ChatRequest(BaseModel):
@@ -45,21 +41,14 @@ def normalize_input(input_text):
     return input_text.strip()
 def remove_duplicates(text):
-    lines = text.split('\n')
-    unique_lines = []
-    seen_lines = set()
-    for line in lines:
-        line = line.strip()
-        if line and line not in seen_lines:
-            unique_lines.append(line)
-            seen_lines.add(line)
-    return '\n'.join(unique_lines)
 def generate_model_response(model, inputs):
     try:
         if model is None:
             return "Model loading failed."
-        response = model(inputs, max_tokens=-1)
         return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
         print(f"Error generating response: {e}")
@@ -68,53 +57,39 @@ def generate_model_response(model, inputs):
 app = FastAPI()
 origins = ["*"]
 app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
 @app.post("/generate")
 async def generate(request: ChatRequest):
     inputs = normalize_input(request.message)
-    chunk_size = 500
     chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
     overall_response = ""
     for chunk in chunks:
         with ThreadPoolExecutor() as executor:
             futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
-            responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
-        unique_responses = {}
-        for response in responses:
-            if response['model'] not in unique_responses and response['response']:
-                unique_responses[response['model']] = response['response']
-        chunk_response = ""
-        for model, response in unique_responses.items():
-            chunk_response += f"**{model}:**\n{response}\n\n"
-        overall_response += chunk_response
     return {"response": overall_response}
 async def process_message(message, history):
     try:
-        port = int(os.environ.get("PORT", 7860))
         response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
         formatted_response = response["response"]
         history.append((message, formatted_response))
         return history, history
     except requests.exceptions.RequestException as e:
-        return history, f"Error communicating with the backend: {e}"
 iface = gr.Interface(
     fn=process_message,
     inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.State([])],
     outputs=[gr.Chatbot(), gr.State([])],
-    title="Multi-Model LLM API",
-    description="Enter a message and get responses from multiple LLMs.",
 )
 if __name__ == "__main__":

 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}
 model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
 models = {}
 def load_model(model_config):
     model_name = model_config['name']
+    try:
+        model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
+        models[model_name] = model
+        global_data['models'] = models
+        return model
+    except Exception as e:
+        print(f"Error loading model {model_name}: {e}")
+        return None
 for config in model_configs:
     model = load_model(config)
     if model is None:
         exit(1)
 class ChatRequest(BaseModel):
     return input_text.strip()
 def remove_duplicates(text):
+    lines = [line.strip() for line in text.split('\n') if line.strip()]
+    return '\n'.join(dict.fromkeys(lines))
 def generate_model_response(model, inputs):
     try:
         if model is None:
             return "Model loading failed."
+        response = model(inputs, max_tokens=1000) #Reduced chunk size for safety
         return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
         print(f"Error generating response: {e}")
 app = FastAPI()
 origins = ["*"]
 app.add_middleware(
+    CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
 )
 @app.post("/generate")
 async def generate(request: ChatRequest):
     inputs = normalize_input(request.message)
+    chunk_size = 400 # Reduced chunk size further for this model
     chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
     overall_response = ""
     for chunk in chunks:
         with ThreadPoolExecutor() as executor:
             futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
+            responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
+        for response in responses:  #Simplified response processing
+            overall_response += f"**{response['model']}:**\n{response['response']}\n\n"
     return {"response": overall_response}
 async def process_message(message, history):
     try:
+        port = os.environ.get("PORT", 7860)
         response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
         formatted_response = response["response"]
         history.append((message, formatted_response))
         return history, history
     except requests.exceptions.RequestException as e:
+        return history, f"Error: {e}"
 iface = gr.Interface(
     fn=process_message,
     inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.State([])],
     outputs=[gr.Chatbot(), gr.State([])],
+    title="Multi-Model LLM API", description="Enter a message and get responses from multiple LLMs."
 )
 if __name__ == "__main__":