Pavan178 commited on
Commit
7ea4acb
·
verified ·
1 Parent(s): 1d96682

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -60,11 +60,26 @@ def create_db(splits, collection_name):
60
 
61
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
62
  progress(0.1, desc="Initializing HF tokenizer...")
63
- tokenizer = AutoTokenizer.from_pretrained(llm_model)
 
 
 
 
 
 
 
 
 
 
64
 
65
  progress(0.3, desc="Loading model...")
66
  try:
67
- model = AutoModelForCausalLM.from_pretrained(llm_model, torch_dtype=torch.float16, device_map="auto")
 
 
 
 
 
68
  except RuntimeError as e:
69
  if "CUDA out of memory" in str(e):
70
  raise gr.Error("GPU memory exceeded. Try a smaller model or reduce batch size.")
@@ -85,6 +100,8 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
85
  eos_token_id=tokenizer.eos_token_id
86
  )
87
  llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
 
 
88
 
89
  progress(0.75, desc="Defining buffer memory...")
90
  memory = ConversationBufferMemory(
 
60
 
61
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
62
  progress(0.1, desc="Initializing HF tokenizer...")
63
+
64
+ # Retrieve the Hugging Face token from environment variables
65
+ hf_token = os.environ.get("HF_TOKEN")
66
+ if not hf_token:
67
+ raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
68
+
69
+ # Log in to Hugging Face
70
+ login(token=hf_token)
71
+
72
+ # Initialize tokenizer and model with the token
73
+ tokenizer = AutoTokenizer.from_pretrained(llm_model, use_auth_token=hf_token)
74
 
75
  progress(0.3, desc="Loading model...")
76
  try:
77
+ model = AutoModelForCausalLM.from_pretrained(
78
+ llm_model,
79
+ use_auth_token=hf_token,
80
+ torch_dtype=torch.float16,
81
+ device_map="auto"
82
+ )
83
  except RuntimeError as e:
84
  if "CUDA out of memory" in str(e):
85
  raise gr.Error("GPU memory exceeded. Try a smaller model or reduce batch size.")
 
100
  eos_token_id=tokenizer.eos_token_id
101
  )
102
  llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
103
+
104
+
105
 
106
  progress(0.75, desc="Defining buffer memory...")
107
  memory = ConversationBufferMemory(