Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,28 +14,24 @@ from pydantic import BaseModel
|
|
14 |
load_dotenv()
|
15 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
16 |
|
17 |
-
global_data = {'models': {}, 'tokens': {'
|
18 |
-
|
19 |
model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
|
20 |
-
|
21 |
models = {}
|
22 |
|
23 |
def load_model(model_config):
|
24 |
model_name = model_config['name']
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
return None
|
34 |
|
35 |
for config in model_configs:
|
36 |
model = load_model(config)
|
37 |
if model is None:
|
38 |
-
print(f"Failed to load model {config['name']}. Exiting.")
|
39 |
exit(1)
|
40 |
|
41 |
class ChatRequest(BaseModel):
|
@@ -45,21 +41,14 @@ def normalize_input(input_text):
|
|
45 |
return input_text.strip()
|
46 |
|
47 |
def remove_duplicates(text):
|
48 |
-
lines = text.split('\n')
|
49 |
-
|
50 |
-
seen_lines = set()
|
51 |
-
for line in lines:
|
52 |
-
line = line.strip()
|
53 |
-
if line and line not in seen_lines:
|
54 |
-
unique_lines.append(line)
|
55 |
-
seen_lines.add(line)
|
56 |
-
return '\n'.join(unique_lines)
|
57 |
|
58 |
def generate_model_response(model, inputs):
|
59 |
try:
|
60 |
if model is None:
|
61 |
return "Model loading failed."
|
62 |
-
response = model(inputs, max_tokens
|
63 |
return remove_duplicates(response['choices'][0]['text'])
|
64 |
except Exception as e:
|
65 |
print(f"Error generating response: {e}")
|
@@ -68,53 +57,39 @@ def generate_model_response(model, inputs):
|
|
68 |
app = FastAPI()
|
69 |
origins = ["*"]
|
70 |
app.add_middleware(
|
71 |
-
CORSMiddleware,
|
72 |
-
allow_origins=origins,
|
73 |
-
allow_credentials=True,
|
74 |
-
allow_methods=["*"],
|
75 |
-
allow_headers=["*"],
|
76 |
)
|
77 |
|
78 |
@app.post("/generate")
|
79 |
async def generate(request: ChatRequest):
|
80 |
inputs = normalize_input(request.message)
|
81 |
-
chunk_size =
|
82 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
|
83 |
overall_response = ""
|
|
|
84 |
for chunk in chunks:
|
85 |
with ThreadPoolExecutor() as executor:
|
86 |
futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
|
87 |
-
responses = [{'model':
|
88 |
-
|
89 |
-
|
90 |
-
for response in responses:
|
91 |
-
if response['model'] not in unique_responses and response['response']:
|
92 |
-
unique_responses[response['model']] = response['response']
|
93 |
-
|
94 |
-
chunk_response = ""
|
95 |
-
for model, response in unique_responses.items():
|
96 |
-
chunk_response += f"**{model}:**\n{response}\n\n"
|
97 |
-
overall_response += chunk_response
|
98 |
-
|
99 |
return {"response": overall_response}
|
100 |
|
101 |
async def process_message(message, history):
|
102 |
try:
|
103 |
-
port =
|
104 |
response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
|
105 |
formatted_response = response["response"]
|
106 |
history.append((message, formatted_response))
|
107 |
return history, history
|
108 |
except requests.exceptions.RequestException as e:
|
109 |
-
return history, f"Error
|
110 |
-
|
111 |
|
112 |
iface = gr.Interface(
|
113 |
fn=process_message,
|
114 |
inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.State([])],
|
115 |
outputs=[gr.Chatbot(), gr.State([])],
|
116 |
-
title="Multi-Model LLM API",
|
117 |
-
description="Enter a message and get responses from multiple LLMs.",
|
118 |
)
|
119 |
|
120 |
if __name__ == "__main__":
|
|
|
14 |
load_dotenv()
|
15 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
16 |
|
17 |
+
global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}
|
|
|
18 |
model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
|
|
|
19 |
models = {}
|
20 |
|
21 |
def load_model(model_config):
|
22 |
model_name = model_config['name']
|
23 |
+
try:
|
24 |
+
model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
|
25 |
+
models[model_name] = model
|
26 |
+
global_data['models'] = models
|
27 |
+
return model
|
28 |
+
except Exception as e:
|
29 |
+
print(f"Error loading model {model_name}: {e}")
|
30 |
+
return None
|
|
|
31 |
|
32 |
for config in model_configs:
|
33 |
model = load_model(config)
|
34 |
if model is None:
|
|
|
35 |
exit(1)
|
36 |
|
37 |
class ChatRequest(BaseModel):
|
|
|
41 |
return input_text.strip()
|
42 |
|
43 |
def remove_duplicates(text):
|
44 |
+
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
45 |
+
return '\n'.join(dict.fromkeys(lines))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def generate_model_response(model, inputs):
|
48 |
try:
|
49 |
if model is None:
|
50 |
return "Model loading failed."
|
51 |
+
response = model(inputs, max_tokens=1000) #Reduced chunk size for safety
|
52 |
return remove_duplicates(response['choices'][0]['text'])
|
53 |
except Exception as e:
|
54 |
print(f"Error generating response: {e}")
|
|
|
57 |
app = FastAPI()
|
58 |
origins = ["*"]
|
59 |
app.add_middleware(
|
60 |
+
CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
|
|
|
|
|
|
|
|
61 |
)
|
62 |
|
63 |
@app.post("/generate")
|
64 |
async def generate(request: ChatRequest):
|
65 |
inputs = normalize_input(request.message)
|
66 |
+
chunk_size = 400 # Reduced chunk size further for this model
|
67 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
|
68 |
overall_response = ""
|
69 |
+
|
70 |
for chunk in chunks:
|
71 |
with ThreadPoolExecutor() as executor:
|
72 |
futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
|
73 |
+
responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
|
74 |
+
for response in responses: #Simplified response processing
|
75 |
+
overall_response += f"**{response['model']}:**\n{response['response']}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
return {"response": overall_response}
|
77 |
|
78 |
async def process_message(message, history):
|
79 |
try:
|
80 |
+
port = os.environ.get("PORT", 7860)
|
81 |
response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
|
82 |
formatted_response = response["response"]
|
83 |
history.append((message, formatted_response))
|
84 |
return history, history
|
85 |
except requests.exceptions.RequestException as e:
|
86 |
+
return history, f"Error: {e}"
|
|
|
87 |
|
88 |
iface = gr.Interface(
|
89 |
fn=process_message,
|
90 |
inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.State([])],
|
91 |
outputs=[gr.Chatbot(), gr.State([])],
|
92 |
+
title="Multi-Model LLM API", description="Enter a message and get responses from multiple LLMs."
|
|
|
93 |
)
|
94 |
|
95 |
if __name__ == "__main__":
|