Ouiam123 commited on
Commit
303b9b0
·
verified ·
1 Parent(s): 4ec44a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -1,21 +1,43 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from huggingface_hub import login
 
4
  import os
5
 
6
  # Authenticate with Hugging Face
7
- HF_API_TOKEN = os.getenv("ttt")
 
 
8
  login(HF_API_TOKEN)
9
 
10
- # Load your model
11
  model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"
12
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_API_TOKEN)
13
- model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=HF_API_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Define the response generation function
16
  def generate_response(prompt):
17
- inputs = tokenizer(prompt, return_tensors="pt")
18
- outputs = model.generate(**inputs, max_new_tokens=100)
 
 
 
 
 
 
19
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
20
 
21
  # Create a Gradio interface
@@ -23,8 +45,10 @@ interface = gr.Interface(
23
  fn=generate_response,
24
  inputs="text",
25
  outputs="text",
26
- title="Moroccan Tourism Chatbot"
 
27
  )
28
 
29
  # Launch the app
30
- interface.launch()
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from huggingface_hub import login
4
+ import torch
5
  import os
6
 
7
  # Authenticate with Hugging Face
8
+ HF_API_TOKEN = os.getenv("ttt") # Ensure 'ttt' is set in the environment
9
+ if HF_API_TOKEN is None:
10
+ raise ValueError("Hugging Face API token ('ttt') not set in environment variables.")
11
  login(HF_API_TOKEN)
12
 
13
+ # Load your model with memory optimization
14
  model_name = "Ouiam123/Llama-2-7b-chat-finetune-tourism"
15
+
16
+ # Check for GPU availability
17
+ use_gpu = torch.cuda.is_available()
18
+
19
+ try:
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_API_TOKEN)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ device_map="auto", # Automatically map layers to devices (CPU/GPU)
24
+ offload_folder="./offload", # Offload parts of the model to disk if needed
25
+ load_in_8bit=use_gpu, # Enable 8-bit precision for GPU
26
+ use_auth_token=HF_API_TOKEN,
27
+ )
28
+ except Exception as e:
29
+ raise RuntimeError(f"Failed to load the model: {e}")
30
 
31
  # Define the response generation function
32
  def generate_response(prompt):
33
+ # Preprocess input to optimize memory
34
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
35
+
36
+ # Move inputs to GPU if available
37
+ if use_gpu:
38
+ inputs = {key: value.to("cuda") for key, value in inputs.items()}
39
+
40
+ outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
41
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
42
 
43
  # Create a Gradio interface
 
45
  fn=generate_response,
46
  inputs="text",
47
  outputs="text",
48
+ title="Optimized Moroccan Tourism Chatbot",
49
+ description="Ask any questions about tourism in Morocco!",
50
  )
51
 
52
  # Launch the app
53
+ if __name__ == "__main__":
54
+ interface.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))