Spaces:

lilmeaty
/

gcs

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 24, 2024

Commit

4a8c11d

verified ·

1 Parent(s): 14051c4

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -64

app.py CHANGED Viewed

@@ -49,9 +49,6 @@ class GenerateRequest(BaseModel):
     do_sample: bool = False
     chunk_delay: float = 0.1
     stop_sequences: list = []
-    min_length: int = 0
-    no_repeat_ngram_size: int = 0
-    length_penalty: float = 1.0
     @field_validator("model_name")
     def model_name_cannot_be_empty(cls, v):
@@ -73,68 +70,56 @@ class GCSModelLoader:
     def _get_gcs_uri(self, model_name):
         return f"{model_name}"
-    async def _create_gcs_folder(self, model_name):
-        blob_name = f"{model_name}/.keep"
-        blob = self.bucket.blob(blob_name)
-        if not await blob.exists(client=self.bucket.client):
-            try:
-                await blob.upload_from_string('')
-            except Exception as e:
-                logger.error(f"Error creating folder for {model_name}: {e}")
-    async def _download_from_gcs(self, gcs_path):
-        try:
-            blob = self.bucket.blob(gcs_path)
-            if await blob.exists(client=self.bucket.client):
-                return await blob.download_as_string()
-            return None
-        except Exception as e:
-            logger.error(f"Error accessing {gcs_path}: {e}")
-            return None
-    async def _upload_to_gcs(self, content, gcs_path):
-        try:
-            blob = self.bucket.blob(gcs_path)
-            await blob.upload_from_string(content)
-            return True
-        except Exception as e:
-            logger.error(f"Error uploading to {gcs_path}: {e}")
-            return False
     async def load_config(self, model_name):
-        gcs_path = f"{self._get_gcs_uri(model_name)}/config.json"
-        data = await self._download_from_gcs(gcs_path)
-        if data:
             try:
-                return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, _commit_hash=None, **json.loads(data))
             except Exception as e:
                 logger.error(f"Error loading config from GCS: {e}")
         else:
             try:
                 config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
-                await self._create_gcs_folder(model_name)
-                await self._upload_to_gcs(json.dumps(config.to_dict()), gcs_path)
                 return config
-            except Exception as e_hf:
-                logger.error(f"Error loading config from Hugging Face and saving to GCS: {e_hf}")
-        return None
     async def load_tokenizer(self, model_name):
-        gcs_path = f"{self._get_gcs_uri(model_name)}/tokenizer.json"
-        data = await self._download_from_gcs(gcs_path)
-        if data:
             try:
-                return AutoTokenizer.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, _commit_hash=None, **json.loads(data))
             except Exception as e:
                 logger.error(f"Error loading tokenizer from GCS: {e}")
         else:
             try:
                 tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
-                await self._upload_to_gcs(json.dumps(tokenizer.to_dict()), gcs_path)
                 return tokenizer
-            except Exception as e_hf:
-                logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e_hf}")
-        return None
     async def load_model(self, model_name, config):
         gcs_model_path = self._get_gcs_uri(model_name)
@@ -152,8 +137,8 @@ class GCSModelLoader:
                 model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
                 model.save_pretrained(gcs_model_path)
                 return model
-            except Exception as e_hf:
-                logger.error(f"Error loading model from Hugging Face and saving to GCS: {e_hf}")
                 raise HTTPException(status_code=500, detail="Failed to load model")
 model_loader = GCSModelLoader(bucket)
@@ -170,13 +155,8 @@ async def generate_stream(model, tokenizer, input_text, generation_config, stop_
             await asyncio.sleep(chunk_delay)
             if any(stop in token for stop in stop_sequences):
                 break
-        yield {"finish": True}
-    async def generate_events():
-        async for event_data in event_stream():
-            yield json.dumps(event_data) + "\n"
-    return generate_events()
 async def generate_non_stream(model, tokenizer, input_text, generation_config):
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
@@ -217,16 +197,12 @@ async def generate(request: GenerateRequest):
         if task_type == "text-to-text":
             if stream:
-                return StreamingResponse(
-                    generate_stream(
-                        model, tokenizer, input_text, generation_config, request.stop_sequences, request.chunk_delay
-                    ),
-                    media_type="text/event-stream"
-                )
             else:
-                text_result = await generate_non_stream(
-                    model, tokenizer, input_text, generation_config
-                )
                 return {"text": text_result}
         else:
              raise HTTPException(status_code=400, detail=f"Task type not supported: {task_type}")

     do_sample: bool = False
     chunk_delay: float = 0.1
     stop_sequences: list = []
     @field_validator("model_name")
     def model_name_cannot_be_empty(cls, v):
     def _get_gcs_uri(self, model_name):
         return f"{model_name}"
+    async def _blob_exists(self, blob_path):
+        blob = self.bucket.blob(blob_path)
+        return await blob.exists(client=self.bucket.client)
+    async def _download_string(self, blob_path):
+        blob = self.bucket.blob(blob_path)
+        if await self._blob_exists(blob_path):
+            return await blob.download_as_string()
+        return None
+    async def _upload_string(self, content, blob_path):
+        blob = self.bucket.blob(blob_path)
+        await blob.upload_from_string(content)
     async def load_config(self, model_name):
+        gcs_config_path = f"{self._get_gcs_uri(model_name)}/config.json"
+        config_str = await self._download_string(gcs_config_path)
+        if config_str:
             try:
+                return AutoConfig.from_pretrained(pretrained_model_name_or_path=None, trust_remote_code=True, **json.loads(config_str))
             except Exception as e:
                 logger.error(f"Error loading config from GCS: {e}")
+                return None
         else:
             try:
                 config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
+                await self._upload_string(json.dumps(config.to_dict()), gcs_config_path)
                 return config
+            except Exception as e:
+                logger.error(f"Error loading config from Hugging Face and saving to GCS: {e}")
+                return None
     async def load_tokenizer(self, model_name):
+        gcs_tokenizer_path = self._get_gcs_uri(model_name)
+        if await self._blob_exists(f"{gcs_tokenizer_path}/tokenizer_config.json") and \
+           await self._blob_exists(f"{gcs_tokenizer_path}/vocab.json") and \
+           await self._blob_exists(f"{gcs_tokenizer_path}/merges.txt"):
             try:
+                return AutoTokenizer.from_pretrained(gcs_tokenizer_path, trust_remote_code=True)
             except Exception as e:
                 logger.error(f"Error loading tokenizer from GCS: {e}")
+                return None
         else:
             try:
                 tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
+                tokenizer.save_pretrained(gcs_tokenizer_path)
                 return tokenizer
+            except Exception as e:
+                logger.error(f"Error loading tokenizer from Hugging Face and saving to GCS: {e}")
+                return None
     async def load_model(self, model_name, config):
         gcs_model_path = self._get_gcs_uri(model_name)
                 model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN, trust_remote_code=True)
                 model.save_pretrained(gcs_model_path)
                 return model
+            except Exception as e:
+                logger.error(f"Error loading model from Hugging Face and saving to GCS: {e}")
                 raise HTTPException(status_code=500, detail="Failed to load model")
 model_loader = GCSModelLoader(bucket)
             await asyncio.sleep(chunk_delay)
             if any(stop in token for stop in stop_sequences):
                 break
+    return event_stream()
 async def generate_non_stream(model, tokenizer, input_text, generation_config):
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
         if task_type == "text-to-text":
             if stream:
+                async def generate_events():
+                    async for event in generate_stream(model, tokenizer, input_text, generation_config, request.stop_sequences, request.chunk_delay):
+                        yield json.dumps(event).encode('utf-8') + b"\n"
+                return StreamingResponse(generate_events(), media_type="text/event-stream")
             else:
+                text_result = await generate_non_stream(model, tokenizer, input_text, generation_config)
                 return {"text": text_result}
         else:
              raise HTTPException(status_code=400, detail=f"Task type not supported: {task_type}")