Ouiam123 commited on
Commit
9f4ea3c
·
verified ·
1 Parent(s): ddc0658

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -29
app.py CHANGED
@@ -1,35 +1,43 @@
1
- import requests
2
  import os
 
 
 
3
 
4
- # Ensure the Hugging Face API token is set in your environment
5
- api_token = os.getenv("ttt")
6
 
7
- # Check if the token is available
8
- if api_token is None:
 
9
  print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
10
  exit(1)
11
 
12
- # Set the authorization header with the token
13
- headers = {
14
- "Authorization": f"Bearer {api_token}",
15
- "Content-Type": "application/json"
16
- }
17
-
18
- # URL for the Hugging Face model inference (change this URL to your space's URL)
19
- model_url = "https://hf.space/embed/Ouiam123/Llama-2-7b-chat-finetune-tourism/+/api/predict"
20
-
21
- # Input text you want to send to the model
22
- input_text = "What should I do if I get lost in Morocco?"
23
-
24
- # Make a POST request to the Hugging Face model inference API
25
- response = requests.post(
26
- model_url,
27
- headers=headers,
28
- json={"data": [input_text]} # Input should be under the "data" field
29
- )
30
-
31
- # Check the response
32
- if response.status_code == 200:
33
- print("Response:", response.json()) # Print the model's response
34
- else:
35
- print(f"Error {response.status_code}: {response.text}")
 
 
 
 
 
 
 
1
  import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from dotenv import load_dotenv
5
 
6
+ # Charger les variables d'environnement
7
+ load_dotenv()
8
 
9
+ # Récupérer le token API Hugging Face
10
+ api_token = os.getenv("HF_API_TOKEN")
11
+ if not api_token:
12
  print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.")
13
  exit(1)
14
 
15
+ # Modèle Hugging Face
16
+ model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"
17
+
18
+ # Vérifiez si CUDA est disponible pour utiliser le GPU
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ try:
22
+ # Charger le tokenizer
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
24
+
25
+ # Charger le modèle avec quantization 4 bits
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ model_name,
28
+ load_in_4bit=True, # Quantization 4 bits
29
+ device_map="auto" # Mapper automatiquement sur GPU ou CPU
30
+ )
31
+
32
+ # Préparer le texte d'entrée
33
+ input_text = "What should I do if I get lost in Morocco?"
34
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
35
+
36
+ # Générer une réponse
37
+ outputs = model.generate(inputs["input_ids"], max_length=100, num_beams=5, early_stopping=True)
38
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
+
40
+ print("Response:", response)
41
+
42
+ except Exception as e:
43
+ print(f"An error occurred: {e}")