Hjgugugjhuhjggg commited on
Commit
d0e7d36
·
verified ·
1 Parent(s): 646b5b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from llama_cpp import Llama
3
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ import re
5
+ import httpx
6
+ import asyncio
7
+ import gradio as gr
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import json
11
+
12
+ load_dotenv()
13
+
14
+ HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
15
+
16
+ global_data = {
17
+ 'models': {},
18
+ 'tokens': {
19
+ 'eos': 'eos_token',
20
+ 'pad': 'pad_token',
21
+ 'padding': 'padding_token',
22
+ 'unk': 'unk_token',
23
+ 'bos': 'bos_token',
24
+ 'sep': 'sep_token',
25
+ 'cls': 'cls_token',
26
+ 'mask': 'mask_token'
27
+ }
28
+ }
29
+
30
+ model_configs = [
31
+ {"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}
32
+ # Add more models here as needed. Ensure the repo_id and filename are correct.
33
+ ]
34
+
35
+ class ModelManager:
36
+ def __init__(self):
37
+ self.models = {}
38
+
39
+ def load_model(self, model_config):
40
+ if model_config['name'] not in self.models:
41
+ try:
42
+ model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
43
+ self.models[model_config['name']] = model
44
+ print(f"Model '{model_config['name']}' loaded successfully.")
45
+ except Exception as e:
46
+ print(f"Error loading model {model_config['name']}: {e}")
47
+ self.models[model_config['name']] = None # Indicate loading failure
48
+
49
+
50
+ def load_all_models(self):
51
+ with ThreadPoolExecutor() as executor:
52
+ futures = [executor.submit(self.load_model, config) for config in model_configs]
53
+ for future in as_completed(futures):
54
+ future.result() # Propagate exceptions during loading
55
+ return self.models
56
+
57
+ model_manager = ModelManager()
58
+ global_data['models'] = model_manager.load_all_models()
59
+
60
+
61
+ class ChatRequest(BaseModel):
62
+ message: str
63
+
64
+ def normalize_input(input_text):
65
+ return input_text.strip()
66
+
67
+ def remove_duplicates(text):
68
+ lines = text.split('\n')
69
+ unique_lines = []
70
+ seen_lines = set()
71
+ for line in lines:
72
+ line = line.strip() #Added to remove extra whitespace
73
+ if line and line not in seen_lines:
74
+ unique_lines.append(line)
75
+ seen_lines.add(line)
76
+ return '\n'.join(unique_lines)
77
+
78
+ def generate_model_response(model, inputs):
79
+ try:
80
+ if model is None: #Handle failed model loading
81
+ return ""
82
+ response = model(inputs)
83
+ return remove_duplicates(response['choices'][0]['text'])
84
+ except Exception as e:
85
+ print(f"Error generating model response: {e}")
86
+ return f"Error: {e}" #Return informative error message
87
+
88
+ def remove_repetitive_responses(responses):
89
+ unique_responses = {}
90
+ for response in responses:
91
+ if response['model'] not in unique_responses and response['response']: #added check for empty responses
92
+ unique_responses[response['model']] = response['response']
93
+ return unique_responses
94
+
95
+
96
+ async def process_message(message, history):
97
+ inputs = normalize_input(message)
98
+ with ThreadPoolExecutor() as executor:
99
+ futures = [
100
+ executor.submit(generate_model_response, model, inputs)
101
+ for model in global_data['models'].values()
102
+ ]
103
+ responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
104
+
105
+ unique_responses = remove_repetitive_responses(responses)
106
+ formatted_response = ""
107
+ for model, response in unique_responses.items():
108
+ formatted_response += f"**{model}:**\n{response}\n\n"
109
+
110
+ history.append((message, formatted_response))
111
+ return history, ""
112
+
113
+
114
+ iface = gr.Interface(
115
+ fn=process_message,
116
+ inputs=[
117
+ gr.Textbox(lines=2, placeholder="Enter your message here..."),
118
+ gr.State([])
119
+ ],
120
+ outputs=[
121
+ gr.Chatbot(),
122
+ gr.Textbox(label="cURL command", visible=False) #Hidden cURL command
123
+ ],
124
+ title="Multi-Model LLM API",
125
+ description="Enter a message and get responses from multiple LLMs.",
126
+ )
127
+
128
+ if __name__ == "__main__":
129
+ port = int(os.environ.get("PORT", 7860))
130
+ iface.launch(server_port=port)