Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,10 @@ import asyncio
|
|
15 |
import json
|
16 |
import logging
|
17 |
from huggingface_hub import login
|
|
|
|
|
|
|
|
|
18 |
|
19 |
GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
|
20 |
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
@@ -23,6 +27,9 @@ HUGGINGFACE_HUB_TOKEN = os.getenv("HF_API_TOKEN")
|
|
23 |
if HUGGINGFACE_HUB_TOKEN:
|
24 |
login(token=HUGGINGFACE_HUB_TOKEN)
|
25 |
|
|
|
|
|
|
|
26 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
27 |
logger = logging.getLogger(__name__)
|
28 |
|
@@ -71,7 +78,7 @@ class GCSModelLoader:
|
|
71 |
self.bucket = bucket
|
72 |
|
73 |
def _get_gcs_uri(self, model_name):
|
74 |
-
|
75 |
|
76 |
def _blob_exists(self, blob_path):
|
77 |
blob = self.bucket.blob(blob_path)
|
@@ -87,20 +94,27 @@ class GCSModelLoader:
|
|
87 |
blob = self.bucket.blob(blob_path)
|
88 |
blob.upload_from_string(content)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def load_config(self, model_name):
|
91 |
-
|
92 |
gcs_config_path = f"{self._get_gcs_uri(model_name)}/config.json"
|
93 |
config_content = self._download_content(gcs_config_path)
|
94 |
if config_content:
|
95 |
try:
|
96 |
return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, config_dict=json.loads(config_content), token=HUGGINGFACE_HUB_TOKEN)
|
97 |
except Exception as e:
|
98 |
-
|
99 |
-
|
100 |
else:
|
101 |
try:
|
102 |
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
103 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
|
|
104 |
self._upload_content(json.dumps(config.to_dict()).encode('utf-8'), f"{gcs_model_folder}/config.json")
|
105 |
return config
|
106 |
except Exception as e:
|
@@ -114,7 +128,7 @@ class GCSModelLoader:
|
|
114 |
|
115 |
if gcs_files_exist:
|
116 |
try:
|
117 |
-
return AutoTokenizer.from_pretrained(gcs_tokenizer_path, trust_remote_code=True,token=HUGGINGFACE_HUB_TOKEN)
|
118 |
except Exception as e:
|
119 |
logger.error(f"Error loading tokenizer from GCS: {e}")
|
120 |
return None
|
@@ -122,9 +136,8 @@ class GCSModelLoader:
|
|
122 |
try:
|
123 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
124 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
125 |
-
|
126 |
-
|
127 |
-
self._upload_content(f.read(), f"{gcs_model_folder}/{filename}")
|
128 |
return tokenizer
|
129 |
except Exception as e:
|
130 |
logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e}")
|
@@ -145,9 +158,8 @@ class GCSModelLoader:
|
|
145 |
try:
|
146 |
model = AutoModelForCausalLM.from_pretrained(model_name, config=config, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
147 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
148 |
-
|
149 |
-
|
150 |
-
self._upload_content(f.read(), f"{gcs_model_folder}/{filename}")
|
151 |
return model
|
152 |
except Exception as e:
|
153 |
logger.error(f"Error loading model from Hugging Face and saving to GCS: {e}")
|
@@ -157,19 +169,19 @@ model_loader = GCSModelLoader(bucket)
|
|
157 |
|
158 |
async def generate_stream(model, tokenizer, input_text, generation_config):
|
159 |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
|
160 |
-
|
161 |
-
**inputs,
|
162 |
-
generation_config=generation_config,
|
163 |
-
stream=True,
|
164 |
-
)
|
165 |
async def token_stream():
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
172 |
|
|
|
173 |
|
174 |
def generate_non_stream(model, tokenizer, input_text, generation_config):
|
175 |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
|
@@ -191,7 +203,7 @@ async def generate(request: GenerateRequest):
|
|
191 |
try:
|
192 |
gcs_model_folder_uri = model_loader._get_gcs_uri(model_name)
|
193 |
if not model_loader._blob_exists(f"{gcs_model_folder_uri}/config.json"):
|
194 |
-
|
195 |
|
196 |
config = model_loader.load_config(model_name)
|
197 |
if not config:
|
@@ -199,21 +211,17 @@ async def generate(request: GenerateRequest):
|
|
199 |
|
200 |
tokenizer = model_loader.load_tokenizer(model_name)
|
201 |
if not tokenizer:
|
202 |
-
|
203 |
|
204 |
generation_config_kwargs = generation_params.copy()
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
generation_config_kwargs['sep_token_id'] = tokenizer.sep_token_id
|
211 |
-
if hasattr(tokenizer, 'unk_token_id') and tokenizer.unk_token_id is not None:
|
212 |
-
generation_config_kwargs['unk_token_id'] = tokenizer.unk_token_id
|
213 |
-
|
214 |
model = model_loader.load_model(model_name, config)
|
215 |
if not model:
|
216 |
-
|
217 |
|
218 |
generation_config = GenerationConfig.from_pretrained(
|
219 |
model_name,
|
@@ -223,7 +231,11 @@ async def generate(request: GenerateRequest):
|
|
223 |
|
224 |
if task_type == "text-to-text":
|
225 |
if stream:
|
226 |
-
|
|
|
|
|
|
|
|
|
227 |
else:
|
228 |
text_result = generate_non_stream(model, tokenizer, input_text, generation_config)
|
229 |
return {"text": text_result}
|
|
|
15 |
import json
|
16 |
import logging
|
17 |
from huggingface_hub import login
|
18 |
+
from dotenv import load_dotenv
|
19 |
+
import huggingface_hub
|
20 |
+
|
21 |
+
load_dotenv()
|
22 |
|
23 |
GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
|
24 |
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
|
|
27 |
if HUGGINGFACE_HUB_TOKEN:
|
28 |
login(token=HUGGINGFACE_HUB_TOKEN)
|
29 |
|
30 |
+
os.system("git config --global credential.helper store")
|
31 |
+
huggingface_hub.login(token=HUGGINGFACE_HUB_TOKEN, add_to_git_credential=True)
|
32 |
+
|
33 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
34 |
logger = logging.getLogger(__name__)
|
35 |
|
|
|
78 |
self.bucket = bucket
|
79 |
|
80 |
def _get_gcs_uri(self, model_name):
|
81 |
+
return f"{model_name}"
|
82 |
|
83 |
def _blob_exists(self, blob_path):
|
84 |
blob = self.bucket.blob(blob_path)
|
|
|
94 |
blob = self.bucket.blob(blob_path)
|
95 |
blob.upload_from_string(content)
|
96 |
|
97 |
+
def _create_model_folder(self, model_name):
|
98 |
+
gcs_model_folder = self._get_gcs_uri(model_name)
|
99 |
+
if not self._blob_exists(f"{gcs_model_folder}/.touch"):
|
100 |
+
blob = self.bucket.blob(f"{gcs_model_folder}/.touch")
|
101 |
+
blob.upload_from_string("")
|
102 |
+
logger.info(f"Created folder '{gcs_model_folder}' in GCS.")
|
103 |
+
|
104 |
def load_config(self, model_name):
|
|
|
105 |
gcs_config_path = f"{self._get_gcs_uri(model_name)}/config.json"
|
106 |
config_content = self._download_content(gcs_config_path)
|
107 |
if config_content:
|
108 |
try:
|
109 |
return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, config_dict=json.loads(config_content), token=HUGGINGFACE_HUB_TOKEN)
|
110 |
except Exception as e:
|
111 |
+
logger.error(f"Error loading config from GCS: {e}")
|
112 |
+
return None
|
113 |
else:
|
114 |
try:
|
115 |
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
116 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
117 |
+
self._create_model_folder(model_name)
|
118 |
self._upload_content(json.dumps(config.to_dict()).encode('utf-8'), f"{gcs_model_folder}/config.json")
|
119 |
return config
|
120 |
except Exception as e:
|
|
|
128 |
|
129 |
if gcs_files_exist:
|
130 |
try:
|
131 |
+
return AutoTokenizer.from_pretrained(gcs_tokenizer_path, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
132 |
except Exception as e:
|
133 |
logger.error(f"Error loading tokenizer from GCS: {e}")
|
134 |
return None
|
|
|
136 |
try:
|
137 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
138 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
139 |
+
self._create_model_folder(model_name)
|
140 |
+
tokenizer.save_pretrained(gcs_model_folder)
|
|
|
141 |
return tokenizer
|
142 |
except Exception as e:
|
143 |
logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e}")
|
|
|
158 |
try:
|
159 |
model = AutoModelForCausalLM.from_pretrained(model_name, config=config, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
|
160 |
gcs_model_folder = self._get_gcs_uri(model_name)
|
161 |
+
self._create_model_folder(model_name)
|
162 |
+
model.save_pretrained(gcs_model_folder)
|
|
|
163 |
return model
|
164 |
except Exception as e:
|
165 |
logger.error(f"Error loading model from Hugging Face and saving to GCS: {e}")
|
|
|
169 |
|
170 |
async def generate_stream(model, tokenizer, input_text, generation_config):
|
171 |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
|
172 |
+
|
|
|
|
|
|
|
|
|
173 |
async def token_stream():
|
174 |
+
generation_stream = model.generate(
|
175 |
+
**inputs,
|
176 |
+
generation_config=generation_config,
|
177 |
+
stream=True,
|
178 |
+
)
|
179 |
+
async for output in generation_stream:
|
180 |
+
token_id = output[-1]
|
181 |
+
token = tokenizer.decode(token_id, skip_special_tokens=True)
|
182 |
+
yield {"token": token}
|
183 |
|
184 |
+
return token_stream()
|
185 |
|
186 |
def generate_non_stream(model, tokenizer, input_text, generation_config):
|
187 |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
|
|
|
203 |
try:
|
204 |
gcs_model_folder_uri = model_loader._get_gcs_uri(model_name)
|
205 |
if not model_loader._blob_exists(f"{gcs_model_folder_uri}/config.json"):
|
206 |
+
logger.info(f"Model '{model_name}' not found in GCS, checking Hugging Face.")
|
207 |
|
208 |
config = model_loader.load_config(model_name)
|
209 |
if not config:
|
|
|
211 |
|
212 |
tokenizer = model_loader.load_tokenizer(model_name)
|
213 |
if not tokenizer:
|
214 |
+
raise HTTPException(status_code=400, detail="Tokenizer could not be loaded.")
|
215 |
|
216 |
generation_config_kwargs = generation_params.copy()
|
217 |
+
generation_config_kwargs['pad_token_id'] = tokenizer.pad_token_id
|
218 |
+
generation_config_kwargs['eos_token_id'] = tokenizer.eos_token_id
|
219 |
+
generation_config_kwargs['sep_token_id'] = tokenizer.sep_token_id
|
220 |
+
generation_config_kwargs['unk_token_id'] = tokenizer.unk_token_id
|
221 |
+
|
|
|
|
|
|
|
|
|
222 |
model = model_loader.load_model(model_name, config)
|
223 |
if not model:
|
224 |
+
raise HTTPException(status_code=400, detail="Model could not be loaded.")
|
225 |
|
226 |
generation_config = GenerationConfig.from_pretrained(
|
227 |
model_name,
|
|
|
231 |
|
232 |
if task_type == "text-to-text":
|
233 |
if stream:
|
234 |
+
async def event_stream():
|
235 |
+
async for output in generate_stream(model, tokenizer, input_text, generation_config):
|
236 |
+
yield f"data: {json.dumps(output)}\n\n"
|
237 |
+
await asyncio.sleep(request.chunk_delay)
|
238 |
+
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
239 |
else:
|
240 |
text_result = generate_non_stream(model, tokenizer, input_text, generation_config)
|
241 |
return {"text": text_result}
|