Spaces:

Empereur-Pirate
/

Empereur

Sleeping

App Files Files Community

Empereur-Pirate commited on Feb 27, 2024

Commit

6ee90aa

verified ·

1 Parent(s): 01d7910

Update main.py

Browse files

Files changed (1) hide show

main.py +21 -44

main.py CHANGED Viewed

@@ -1,13 +1,10 @@
 from fastapi import FastAPI, Request, Depends
-from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
-from transformers import pipeline
 from pydantic import BaseModel
 from typing import Optional, Any
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, GenerationConfig
-import os
-from huggingface_hub import login
 # Check whether we are executing inside a Hugging Face Space
 SPACE_NAME = os.getenv("SPACE_NAME", default=None)
@@ -26,28 +23,10 @@ except KeyError:
     print('The environment variable "HF_ACCESS_TOKEN" is not found. Please configure it correctly in your Space.')
     sys.exit(1)
-# Packages and model loading
-import torch
-base_model_id = "152334H/miqu-1-70b-sf"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-base_model = AutoModelForCausalLM.from_pretrained(
-    base_model_id,
-    quantization_config=bnb_config,
-    device_map="auto",
-    trust_remote_code=True,
-)
-# Tokenizer loading
-eval_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-hf", add_bos_token=True, trust_remote_code=True, use_auth_token=True)
-# Streamer
-streamer = TextStreamer(eval_tokenizer)
 # App definition
 app = FastAPI()
@@ -56,22 +35,20 @@ app = FastAPI()
 async def parse_raw(request: Request):
     return await request.body()
-# Generate text
 def generate_text(prompt: str) -> str:
-    model_input = eval_tokenizer(prompt, return_tensors="pt").to("cuda")
-    base_model.eval()
-    with torch.no_grad():
-        generated_sequences = base_model.generate(
-            **model_input,
-            max_new_tokens=4096,
-            repetition_penalty=1.1,
-            do_sample=True,
-            temperature=1,
-            streamer=streamer,
-        )
-    return eval_tokenizer.decode(generated_sequences[0], skip_special_tokens=True)
 # Route for generating text
 @app.post("/generate_text")
@@ -83,4 +60,4 @@ async def generate_text_route(data: BaseModel = Depends(parse_raw)):
     return {"output": generate_text(input_text)}
 # Mount static files
-app.mount("/static", StaticFiles(directory="static"), name="static")

+import os
+import requests
 from fastapi import FastAPI, Request, Depends
+from fastapi.responses import JSONResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 from typing import Optional, Any
 # Check whether we are executing inside a Hugging Face Space
 SPACE_NAME = os.getenv("SPACE_NAME", default=None)
     print('The environment variable "HF_ACCESS_TOKEN" is not found. Please configure it correctly in your Space.')
     sys.exit(1)
+# Set up the API endpoint and headers
+model_id = "152334H/miqu-1-70b-sf"
+endpoint = f"https://api-inference.huggingface.co/models/{model_id}"
+headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
 # App definition
 app = FastAPI()
 async def parse_raw(request: Request):
     return await request.body()
+# Generate text using the Inference API
 def generate_text(prompt: str) -> str:
+    data = {
+        "inputs": prompt,
+        "options": {
+            "max_new_tokens": 200,
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "use_cache": False,
+        },
+    }
+    response = requests.post(endpoint, headers=headers, json=data)
+    return response.json()["generated_text"]
 # Route for generating text
 @app.post("/generate_text")
     return {"output": generate_text(input_text)}
 # Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")