Hjgugugjhuhjggg commited on
Commit
c7434cd
·
verified ·
1 Parent(s): 67b4abe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -35
app.py CHANGED
@@ -15,6 +15,10 @@ import asyncio
15
  import json
16
  import logging
17
  from huggingface_hub import login
 
 
 
 
18
 
19
  GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
20
  GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
@@ -23,6 +27,9 @@ HUGGINGFACE_HUB_TOKEN = os.getenv("HF_API_TOKEN")
23
  if HUGGINGFACE_HUB_TOKEN:
24
  login(token=HUGGINGFACE_HUB_TOKEN)
25
 
 
 
 
26
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
27
  logger = logging.getLogger(__name__)
28
 
@@ -71,7 +78,7 @@ class GCSModelLoader:
71
  self.bucket = bucket
72
 
73
  def _get_gcs_uri(self, model_name):
74
- return f"{model_name}"
75
 
76
  def _blob_exists(self, blob_path):
77
  blob = self.bucket.blob(blob_path)
@@ -87,20 +94,27 @@ class GCSModelLoader:
87
  blob = self.bucket.blob(blob_path)
88
  blob.upload_from_string(content)
89
 
 
 
 
 
 
 
 
90
  def load_config(self, model_name):
91
-
92
  gcs_config_path = f"{self._get_gcs_uri(model_name)}/config.json"
93
  config_content = self._download_content(gcs_config_path)
94
  if config_content:
95
  try:
96
  return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, config_dict=json.loads(config_content), token=HUGGINGFACE_HUB_TOKEN)
97
  except Exception as e:
98
- logger.error(f"Error loading config from GCS: {e}")
99
- return None
100
  else:
101
  try:
102
  config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
103
  gcs_model_folder = self._get_gcs_uri(model_name)
 
104
  self._upload_content(json.dumps(config.to_dict()).encode('utf-8'), f"{gcs_model_folder}/config.json")
105
  return config
106
  except Exception as e:
@@ -114,7 +128,7 @@ class GCSModelLoader:
114
 
115
  if gcs_files_exist:
116
  try:
117
- return AutoTokenizer.from_pretrained(gcs_tokenizer_path, trust_remote_code=True,token=HUGGINGFACE_HUB_TOKEN)
118
  except Exception as e:
119
  logger.error(f"Error loading tokenizer from GCS: {e}")
120
  return None
@@ -122,9 +136,8 @@ class GCSModelLoader:
122
  try:
123
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
124
  gcs_model_folder = self._get_gcs_uri(model_name)
125
- for filename in os.listdir(tokenizer.save_pretrained(None)):
126
- with open(filename, 'rb') as f:
127
- self._upload_content(f.read(), f"{gcs_model_folder}/{filename}")
128
  return tokenizer
129
  except Exception as e:
130
  logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e}")
@@ -145,9 +158,8 @@ class GCSModelLoader:
145
  try:
146
  model = AutoModelForCausalLM.from_pretrained(model_name, config=config, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
147
  gcs_model_folder = self._get_gcs_uri(model_name)
148
- for filename in os.listdir(model.save_pretrained(None)):
149
- with open(filename, 'rb') as f:
150
- self._upload_content(f.read(), f"{gcs_model_folder}/{filename}")
151
  return model
152
  except Exception as e:
153
  logger.error(f"Error loading model from Hugging Face and saving to GCS: {e}")
@@ -157,19 +169,19 @@ model_loader = GCSModelLoader(bucket)
157
 
158
  async def generate_stream(model, tokenizer, input_text, generation_config):
159
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
160
- generation_stream = model.generate(
161
- **inputs,
162
- generation_config=generation_config,
163
- stream=True,
164
- )
165
  async def token_stream():
166
- for output in generation_stream:
167
- token_id = output[-1]
168
- token = tokenizer.decode(token_id, skip_special_tokens=True)
169
- yield {"token": token}
170
- await asyncio.sleep(0.001)
171
- return token_stream()
 
 
 
172
 
 
173
 
174
  def generate_non_stream(model, tokenizer, input_text, generation_config):
175
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
@@ -191,7 +203,7 @@ async def generate(request: GenerateRequest):
191
  try:
192
  gcs_model_folder_uri = model_loader._get_gcs_uri(model_name)
193
  if not model_loader._blob_exists(f"{gcs_model_folder_uri}/config.json"):
194
- logger.info(f"Model '{model_name}' not found in GCS, downloading from Hugging Face.")
195
 
196
  config = model_loader.load_config(model_name)
197
  if not config:
@@ -199,21 +211,17 @@ async def generate(request: GenerateRequest):
199
 
200
  tokenizer = model_loader.load_tokenizer(model_name)
201
  if not tokenizer:
202
- raise HTTPException(status_code=400, detail="Tokenizer could not be loaded.")
203
 
204
  generation_config_kwargs = generation_params.copy()
205
- if hasattr(tokenizer, 'pad_token_id') and tokenizer.pad_token_id is not None:
206
- generation_config_kwargs['pad_token_id'] = tokenizer.pad_token_id
207
- if hasattr(tokenizer, 'eos_token_id') and tokenizer.eos_token_id is not None:
208
- generation_config_kwargs['eos_token_id'] = tokenizer.eos_token_id
209
- if hasattr(tokenizer, 'sep_token_id') and tokenizer.sep_token_id is not None:
210
- generation_config_kwargs['sep_token_id'] = tokenizer.sep_token_id
211
- if hasattr(tokenizer, 'unk_token_id') and tokenizer.unk_token_id is not None:
212
- generation_config_kwargs['unk_token_id'] = tokenizer.unk_token_id
213
-
214
  model = model_loader.load_model(model_name, config)
215
  if not model:
216
- raise HTTPException(status_code=400, detail="Model could not be loaded.")
217
 
218
  generation_config = GenerationConfig.from_pretrained(
219
  model_name,
@@ -223,7 +231,11 @@ async def generate(request: GenerateRequest):
223
 
224
  if task_type == "text-to-text":
225
  if stream:
226
- return StreamingResponse(generate_stream(model, tokenizer, input_text, generation_config), media_type="text/event-stream")
 
 
 
 
227
  else:
228
  text_result = generate_non_stream(model, tokenizer, input_text, generation_config)
229
  return {"text": text_result}
 
15
  import json
16
  import logging
17
  from huggingface_hub import login
18
+ from dotenv import load_dotenv
19
+ import huggingface_hub
20
+
21
+ load_dotenv()
22
 
23
  GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
24
  GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
 
27
  if HUGGINGFACE_HUB_TOKEN:
28
  login(token=HUGGINGFACE_HUB_TOKEN)
29
 
30
+ os.system("git config --global credential.helper store")
31
+ huggingface_hub.login(token=HUGGINGFACE_HUB_TOKEN, add_to_git_credential=True)
32
+
33
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
34
  logger = logging.getLogger(__name__)
35
 
 
78
  self.bucket = bucket
79
 
80
  def _get_gcs_uri(self, model_name):
81
+ return f"{model_name}"
82
 
83
  def _blob_exists(self, blob_path):
84
  blob = self.bucket.blob(blob_path)
 
94
  blob = self.bucket.blob(blob_path)
95
  blob.upload_from_string(content)
96
 
97
+ def _create_model_folder(self, model_name):
98
+ gcs_model_folder = self._get_gcs_uri(model_name)
99
+ if not self._blob_exists(f"{gcs_model_folder}/.touch"):
100
+ blob = self.bucket.blob(f"{gcs_model_folder}/.touch")
101
+ blob.upload_from_string("")
102
+ logger.info(f"Created folder '{gcs_model_folder}' in GCS.")
103
+
104
  def load_config(self, model_name):
 
105
  gcs_config_path = f"{self._get_gcs_uri(model_name)}/config.json"
106
  config_content = self._download_content(gcs_config_path)
107
  if config_content:
108
  try:
109
  return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, config_dict=json.loads(config_content), token=HUGGINGFACE_HUB_TOKEN)
110
  except Exception as e:
111
+ logger.error(f"Error loading config from GCS: {e}")
112
+ return None
113
  else:
114
  try:
115
  config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
116
  gcs_model_folder = self._get_gcs_uri(model_name)
117
+ self._create_model_folder(model_name)
118
  self._upload_content(json.dumps(config.to_dict()).encode('utf-8'), f"{gcs_model_folder}/config.json")
119
  return config
120
  except Exception as e:
 
128
 
129
  if gcs_files_exist:
130
  try:
131
+ return AutoTokenizer.from_pretrained(gcs_tokenizer_path, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
132
  except Exception as e:
133
  logger.error(f"Error loading tokenizer from GCS: {e}")
134
  return None
 
136
  try:
137
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
138
  gcs_model_folder = self._get_gcs_uri(model_name)
139
+ self._create_model_folder(model_name)
140
+ tokenizer.save_pretrained(gcs_model_folder)
 
141
  return tokenizer
142
  except Exception as e:
143
  logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e}")
 
158
  try:
159
  model = AutoModelForCausalLM.from_pretrained(model_name, config=config, trust_remote_code=True, token=HUGGINGFACE_HUB_TOKEN)
160
  gcs_model_folder = self._get_gcs_uri(model_name)
161
+ self._create_model_folder(model_name)
162
+ model.save_pretrained(gcs_model_folder)
 
163
  return model
164
  except Exception as e:
165
  logger.error(f"Error loading model from Hugging Face and saving to GCS: {e}")
 
169
 
170
  async def generate_stream(model, tokenizer, input_text, generation_config):
171
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
172
+
 
 
 
 
173
  async def token_stream():
174
+ generation_stream = model.generate(
175
+ **inputs,
176
+ generation_config=generation_config,
177
+ stream=True,
178
+ )
179
+ async for output in generation_stream:
180
+ token_id = output[-1]
181
+ token = tokenizer.decode(token_id, skip_special_tokens=True)
182
+ yield {"token": token}
183
 
184
+ return token_stream()
185
 
186
  def generate_non_stream(model, tokenizer, input_text, generation_config):
187
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
 
203
  try:
204
  gcs_model_folder_uri = model_loader._get_gcs_uri(model_name)
205
  if not model_loader._blob_exists(f"{gcs_model_folder_uri}/config.json"):
206
+ logger.info(f"Model '{model_name}' not found in GCS, checking Hugging Face.")
207
 
208
  config = model_loader.load_config(model_name)
209
  if not config:
 
211
 
212
  tokenizer = model_loader.load_tokenizer(model_name)
213
  if not tokenizer:
214
+ raise HTTPException(status_code=400, detail="Tokenizer could not be loaded.")
215
 
216
  generation_config_kwargs = generation_params.copy()
217
+ generation_config_kwargs['pad_token_id'] = tokenizer.pad_token_id
218
+ generation_config_kwargs['eos_token_id'] = tokenizer.eos_token_id
219
+ generation_config_kwargs['sep_token_id'] = tokenizer.sep_token_id
220
+ generation_config_kwargs['unk_token_id'] = tokenizer.unk_token_id
221
+
 
 
 
 
222
  model = model_loader.load_model(model_name, config)
223
  if not model:
224
+ raise HTTPException(status_code=400, detail="Model could not be loaded.")
225
 
226
  generation_config = GenerationConfig.from_pretrained(
227
  model_name,
 
231
 
232
  if task_type == "text-to-text":
233
  if stream:
234
+ async def event_stream():
235
+ async for output in generate_stream(model, tokenizer, input_text, generation_config):
236
+ yield f"data: {json.dumps(output)}\n\n"
237
+ await asyncio.sleep(request.chunk_delay)
238
+ return StreamingResponse(event_stream(), media_type="text/event-stream")
239
  else:
240
  text_result = generate_non_stream(model, tokenizer, input_text, generation_config)
241
  return {"text": text_result}