Hjgugugjhuhjggg commited on
Commit
fcf1090
·
verified ·
1 Parent(s): 5b9daa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -27
app.py CHANGED
@@ -9,18 +9,13 @@ from tqdm import tqdm
9
  from dotenv import load_dotenv
10
  from pydantic import BaseModel
11
  from huggingface_hub import hf_hub_download, login
12
- from nltk.tokenize import word_tokenize
13
- from nltk.corpus import stopwords
14
  from sklearn.feature_extraction.text import TfidfVectorizer
15
  from sklearn.metrics.pairwise import cosine_similarity
16
- import nltk
17
  import uvicorn
18
  import psutil
19
  import torch
20
 
21
- nltk.download('punkt')
22
- nltk.download('stopwords')
23
-
24
  load_dotenv()
25
 
26
  app = FastAPI()
@@ -29,19 +24,27 @@ if HUGGINGFACE_TOKEN:
29
  login(token=HUGGINGFACE_TOKEN)
30
 
31
  global_data = {
32
- 'model_configs': [
33
- {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "name": "GPT-2 XL"},
34
- {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "name": "Gemma 2-27B"},
35
- {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "name": "Phi-3 Mini 128K Instruct"},
36
- {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "name": "Starcoder2 3B"},
37
- {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "name": "Qwen2 1.5B Instruct"},
38
- {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "name": "Mistral Nemo Instruct 2407"},
39
- {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "name": "Phi 3 Mini 128K Instruct XXS"},
40
- {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "name": "TinyLlama 1.1B Chat"},
41
- {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "name": "Meta Llama 3.1-8B"},
42
- {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "name": "Codegemma 2B"},
43
- {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Instruct-Q2_K-GGUF", "name": "Meta Llama 3.1-70B Instruct"},
44
- ],
 
 
 
 
 
 
 
 
45
  'training_data': io.StringIO(),
46
  }
47
 
@@ -95,8 +98,14 @@ async def process_message(message: str) -> dict:
95
  except Exception as e:
96
  responses[model_name] = f"Error processing {model_name}: {e}"
97
 
98
- stop_words = set(stopwords.words('english'))
99
- vectorizer = TfidfVectorizer(tokenizer=word_tokenize, stop_words=stop_words)
 
 
 
 
 
 
100
  reference_text = message
101
  response_texts = list(responses.values())
102
  tfidf_matrix = vectorizer.fit_transform([reference_text] + response_texts)
@@ -136,13 +145,13 @@ def release_resources():
136
  torch.cuda.empty_cache()
137
  gc.collect()
138
  except Exception as e:
139
- print(f"Failed to release resources: {e}")
140
 
141
  def resource_manager():
142
- MAX_RAM_PERCENT = 20 #Increased to a more reasonable value
143
- MAX_CPU_PERCENT = 20 #Increased to a more reasonable value
144
- MAX_GPU_PERCENT = 20 #Increased to a more reasonable value
145
- MAX_RAM_MB = 2048 #Increased to a more reasonable value
146
 
147
  while True:
148
  try:
@@ -165,7 +174,7 @@ def resource_manager():
165
  release_resources()
166
 
167
  except Exception as e:
168
- print(f"Error in resource manager: {e}")
169
 
170
  if __name__ == "__main__":
171
  import threading
 
9
  from dotenv import load_dotenv
10
  from pydantic import BaseModel
11
  from huggingface_hub import hf_hub_download, login
12
+ import spacy
 
13
  from sklearn.feature_extraction.text import TfidfVectorizer
14
  from sklearn.metrics.pairwise import cosine_similarity
 
15
  import uvicorn
16
  import psutil
17
  import torch
18
 
 
 
 
19
  load_dotenv()
20
 
21
  app = FastAPI()
 
24
  login(token=HUGGINGFACE_TOKEN)
25
 
26
  global_data = {
27
+ 'model_configs' = [
28
+ {"repo_id": "Ffftdtd5dtft/gpt2-xl-Q2_K-GGUF", "filename": "gpt2-xl-q2_k.gguf", "name": "GPT-2 XL"},
29
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-8B Instruct"},
30
+ {"repo_id": "Ffftdtd5dtft/gemma-2-9b-it-Q2_K-GGUF", "filename": "gemma-2-9b-it-q2_k.gguf", "name": "Gemma 2-9B IT"},
31
+ {"repo_id": "Ffftdtd5dtft/gemma-2-27b-Q2_K-GGUF", "filename": "gemma-2-27b-q2_k.gguf", "name": "Gemma 2-27B"},
32
+ {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "filename": "phi-3-mini-128k-instruct-q2_k.gguf", "name": "Phi-3 Mini 128K Instruct"},
33
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-8B-Q2_K-GGUF", "filename": "meta-llama-3.1-8b-q2_k.gguf", "name": "Meta Llama 3.1-8B"},
34
+ {"repo_id": "Ffftdtd5dtft/Qwen2-7B-Instruct-Q2_K-GGUF", "filename": "qwen2-7b-instruct-q2_k.gguf", "name": "Qwen2 7B Instruct"},
35
+ {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "filename": "starcoder2-3b-q2_k.gguf", "name": "Starcoder2 3B"},
36
+ {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "filename": "qwen2-1.5b-instruct-q2_k.gguf", "name": "Qwen2 1.5B Instruct"},
37
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-q2_k.gguf", "name": "Meta Llama 3.1-70B"},
38
+ {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"},
39
+ {"repo_id": "Ffftdtd5dtft/Hermes-3-Llama-3.1-8B-IQ1_S-GGUF", "filename": "hermes-3-llama-3.1-8b-iq1_s-imat.gguf", "name": "Hermes 3 Llama 3.1-8B"},
40
+ {"repo_id": "Ffftdtd5dtft/Phi-3.5-mini-instruct-Q2_K-GGUF", "filename": "phi-3.5-mini-instruct-q2_k.gguf", "name": "Phi 3.5 Mini Instruct"},
41
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-70B Instruct"},
42
+ {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "filename": "codegemma-2b-iq1_s-imat.gguf", "name": "Codegemma 2B"},
43
+ {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "filename": "phi-3-mini-128k-instruct-iq2_xxs-imat.gguf", "name": "Phi 3 Mini 128K Instruct XXS"},
44
+ {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "filename": "tinyllama-1.1b-chat-v1.0-iq1_s-imat.gguf", "name": "TinyLlama 1.1B Chat"},
45
+ {"repo_id": "Ffftdtd5dtft/Mistral-NeMo-Minitron-8B-Base-IQ1_S-GGUF", "filename": "mistral-nemo-minitron-8b-base-iq1_s-imat.gguf", "name": "Mistral NeMo Minitron 8B Base"},
46
+ {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"}
47
+ ],
48
  'training_data': io.StringIO(),
49
  }
50
 
 
98
  except Exception as e:
99
  responses[model_name] = f"Error processing {model_name}: {e}"
100
 
101
+ nlp = spacy.load("en_core_web_sm")
102
+ stop_words = spacy.lang.en.stop_words.STOP_WORDS
103
+
104
+ def custom_tokenizer(text):
105
+ doc = nlp(text)
106
+ return [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
107
+
108
+ vectorizer = TfidfVectorizer(tokenizer=custom_tokenizer)
109
  reference_text = message
110
  response_texts = list(responses.values())
111
  tfidf_matrix = vectorizer.fit_transform([reference_text] + response_texts)
 
145
  torch.cuda.empty_cache()
146
  gc.collect()
147
  except Exception as e:
148
+ pass
149
 
150
  def resource_manager():
151
+ MAX_RAM_PERCENT = 20
152
+ MAX_CPU_PERCENT = 20
153
+ MAX_GPU_PERCENT = 20
154
+ MAX_RAM_MB = 2048
155
 
156
  while True:
157
  try:
 
174
  release_resources()
175
 
176
  except Exception as e:
177
+ pass
178
 
179
  if __name__ == "__main__":
180
  import threading