asasasText

Runtime error

App Files Files Community

Hjgugugjhuhjggg commited on Nov 23, 2024

Commit

f5bef42

verified ·

1 Parent(s): 2784732

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -54

app.py CHANGED Viewed

@@ -1,78 +1,132 @@
 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
-import re
-import uvicorn
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-import os
 from dotenv import load_dotenv
 from pydantic import BaseModel
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}
-model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]
-models = {}
-def load_model(model_config):
-    model_name = model_config['name']
-    try:
-        model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
-        models[model_name] = model
-        global_data['models'] = models
-        return model
-    except Exception as e:
-        print(f"Error loading model {model_name}: {e}")
-        return None
-for config in model_configs:
-    model = load_model(config)
-    if model is None:
-        exit(1)
 class ChatRequest(BaseModel):
     message: str
-def normalize_input(input_text):
-    return input_text.strip()
-def remove_duplicates(text):
-    lines = [line.strip() for line in text.split('\n') if line.strip()]
-    return '\n'.join(dict.fromkeys(lines))
-def generate_model_response(model, inputs):
     try:
-        if model is None:
-            return "Model loading failed."
-        response = model(inputs, max_tokens=512)
-        return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
-        print(f"Error generating response: {e}")
-        return f"Error: {e}"
-app = FastAPI()
-origins = ["*"]
-app.add_middleware(
-    CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
-)
-@app.post("/generate")
-async def generate(request: ChatRequest):
-    inputs = normalize_input(request.message)
-    chunk_size = 400
-    chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
-    overall_response = ""
-    for chunk in chunks:
-        with ThreadPoolExecutor() as executor:
-            futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
-            responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
-        for response in responses:
-            overall_response += f"**{response['model']}:**\n{response['response']}\n\n"
-    return {"response": overall_response}
 if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
     uvicorn.run(app, host="0.0.0.0", port=port)

+import os
+import gc
+import io
 from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import JSONResponse
+from tqdm import tqdm
 from dotenv import load_dotenv
 from pydantic import BaseModel
+from huggingface_hub import hf_hub_download, login
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import nltk
+nltk.download('punkt')
+nltk.download('stopwords')
 load_dotenv()
+app = FastAPI()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+if HUGGINGFACE_TOKEN:
+    login(token=HUGGINGFACE_TOKEN)
+global_data = {
+    'model_configs': [
+        {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "name": "GPT-2 XL"},
+        {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "name": "Gemma 2-27B"},
+        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "name": "Phi-3 Mini 128K Instruct"},
+        {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "name": "Starcoder2 3B"},
+        {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "name": "Qwen2 1.5B Instruct"},
+        {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "name": "Mistral Nemo Instruct 2407"},
+        {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "name": "Phi 3 Mini 128K Instruct XXS"},
+        {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "name": "TinyLlama 1.1B Chat"},
+        {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "name": "Meta Llama 3.1-8B"},
+        {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "name": "Codegemma 2B"},
+    ],
+    'training_data': io.StringIO(),
+}
+class ModelManager:
+    def __init__(self):
+        self.models = {}
+        self.load_models()
+    def load_models(self):
+        for config in tqdm(global_data['model_configs'], desc="Loading models"):
+            model_name = config['name']
+            if model_name not in self.models:
+                try:
+                    model_path = hf_hub_download(repo_id=config['repo_id'], use_auth_token=HUGGINGFACE_TOKEN)
+                    model = Llama.from_file(model_path)
+                    self.models[model_name] = model
+                except Exception as e:
+                    self.models[model_name] = None
+                finally:
+                    gc.collect()
+    def get_model(self, model_name: str):
+        return self.models.get(model_name)
+model_manager = ModelManager()
 class ChatRequest(BaseModel):
     message: str
+async def generate_model_response(model, inputs: str) -> str:
+    try:
+        if model:
+            response = model(inputs, max_tokens=150)
+            return response['choices'][0]['text'].strip()
+        else:
+            return "Model not loaded"
+    except Exception as e:
+        return f"Error: Could not generate a response. Details: {e}"
+async def process_message(message: str) -> dict:
+    inputs = message.strip()
+    responses = {}
+    with ThreadPoolExecutor(max_workers=len(global_data['model_configs'])) as executor:
+        futures = [executor.submit(generate_model_response, model_manager.get_model(config['name']), inputs) for config in global_data['model_configs'] if model_manager.get_model(config['name'])]
+        for i, future in enumerate(tqdm(as_completed(futures), total=len(futures), desc="Generating responses")):
+            try:
+                model_name = global_data['model_configs'][i]['name']
+                responses[model_name] = future.result()
+            except Exception as e:
+                responses[model_name] = f"Error processing {model_name}: {e}"
+    stop_words = set(stopwords.words('english'))
+    vectorizer = TfidfVectorizer(tokenizer=word_tokenize, stop_words=stop_words)
+    reference_text = message
+    response_texts = list(responses.values())
+    tfidf_matrix = vectorizer.fit_transform([reference_text] + response_texts)
+    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
+    best_response_index = similarities.argmax()
+    best_response_model = list(responses.keys())[best_response_index]
+    best_response_text = response_texts[best_response_index]
+    return {"best_response": {"model": best_response_model, "text": best_response_text}, "all_responses": responses}
+@app.post("/generate_multimodel")
+async def api_generate_multimodel(request: Request):
     try:
+        data = await request.json()
+        message = data.get("message")
+        if not message:
+            raise HTTPException(status_code=400, detail="Missing message")
+        response = await process_message(message)
+        return JSONResponse(response)
+    except HTTPException as e:
+        raise e
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.on_event("startup")
+async def startup_event():
+    pass
+@app.on_event("shutdown")
+async def shutdown_event():
+    gc.collect()
 if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 8000))
     uvicorn.run(app, host="0.0.0.0", port=port)