Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,9 @@ load_dotenv()
|
|
15 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
16 |
|
17 |
global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}
|
|
|
18 |
model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
|
|
|
19 |
models = {}
|
20 |
|
21 |
def load_model(model_config):
|
@@ -48,7 +50,7 @@ def generate_model_response(model, inputs):
|
|
48 |
try:
|
49 |
if model is None:
|
50 |
return "Model loading failed."
|
51 |
-
response = model(inputs, max_tokens=
|
52 |
return remove_duplicates(response['choices'][0]['text'])
|
53 |
except Exception as e:
|
54 |
print(f"Error generating response: {e}")
|
@@ -63,15 +65,14 @@ app.add_middleware(
|
|
63 |
@app.post("/generate")
|
64 |
async def generate(request: ChatRequest):
|
65 |
inputs = normalize_input(request.message)
|
66 |
-
chunk_size = 400
|
67 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
|
68 |
overall_response = ""
|
69 |
-
|
70 |
for chunk in chunks:
|
71 |
with ThreadPoolExecutor() as executor:
|
72 |
futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
|
73 |
responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
|
74 |
-
for response in responses:
|
75 |
overall_response += f"**{response['model']}:**\n{response['response']}\n\n"
|
76 |
return {"response": overall_response}
|
77 |
|
|
|
15 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
16 |
|
17 |
global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}
|
18 |
+
|
19 |
model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
|
20 |
+
|
21 |
models = {}
|
22 |
|
23 |
def load_model(model_config):
|
|
|
50 |
try:
|
51 |
if model is None:
|
52 |
return "Model loading failed."
|
53 |
+
response = model(inputs, max_tokens=512) #max_tokens adjusted for practicality
|
54 |
return remove_duplicates(response['choices'][0]['text'])
|
55 |
except Exception as e:
|
56 |
print(f"Error generating response: {e}")
|
|
|
65 |
@app.post("/generate")
|
66 |
async def generate(request: ChatRequest):
|
67 |
inputs = normalize_input(request.message)
|
68 |
+
chunk_size = 400 #Reduced chunk size
|
69 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
|
70 |
overall_response = ""
|
|
|
71 |
for chunk in chunks:
|
72 |
with ThreadPoolExecutor() as executor:
|
73 |
futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
|
74 |
responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
|
75 |
+
for response in responses:
|
76 |
overall_response += f"**{response['model']}:**\n{response['response']}\n\n"
|
77 |
return {"response": overall_response}
|
78 |
|