Spaces:

Ouiam123
/

my_chatbot_app

Runtime error

Ouiam123 commited on Jan 9

Commit

9f4ea3c

verified ·

1 Parent(s): ddc0658

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,35 +1,43 @@
-import requests
 import os
-# Ensure the Hugging Face API token is set in your environment
-api_token = os.getenv("ttt")
-# Check if the token is available
-if api_token is None:
     print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
     exit(1)
-# Set the authorization header with the token
-headers = {
-    "Authorization": f"Bearer {api_token}",
-    "Content-Type": "application/json"
-}
-# URL for the Hugging Face model inference (change this URL to your space's URL)
-model_url = "https://hf.space/embed/Ouiam123/Llama-2-7b-chat-finetune-tourism/+/api/predict"
-# Input text you want to send to the model
-input_text = "What should I do if I get lost in Morocco?"
-# Make a POST request to the Hugging Face model inference API
-response = requests.post(
-    model_url,
-    headers=headers,
-    json={"data": [input_text]}  # Input should be under the "data" field
-)
-# Check the response
-if response.status_code == 200:
-    print("Response:", response.json())  # Print the model's response
-else:
-    print(f"Error {response.status_code}: {response.text}")

 import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from dotenv import load_dotenv
+# Charger les variables d'environnement
+load_dotenv()
+# Récupérer le token API Hugging Face
+api_token = os.getenv("HF_API_TOKEN")
+if not api_token:
     print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
     exit(1)
+# Modèle Hugging Face
+model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"
+# Vérifiez si CUDA est disponible pour utiliser le GPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    # Charger le tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Charger le modèle avec quantization 4 bits
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        load_in_4bit=True,  # Quantization 4 bits
+        device_map="auto"   # Mapper automatiquement sur GPU ou CPU
+    )
+    # Préparer le texte d'entrée
+    input_text = "What should I do if I get lost in Morocco?"
+    inputs = tokenizer(input_text, return_tensors="pt").to(device)
+    # Générer une réponse
+    outputs = model.generate(inputs["input_ids"], max_length=100, num_beams=5, early_stopping=True)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print("Response:", response)
+except Exception as e:
+    print(f"An error occurred: {e}")