BramLeo commited on
Commit
b1248ec
·
verified ·
1 Parent(s): e6cb93d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -19
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
  import gspread
 
3
  from oauth2client.service_account import ServiceAccountCredentials
4
- from llama_cpp import Llama
5
  from llama_index.core import VectorStoreIndex, Settings
6
  from llama_index.core.node_parser import SentenceSplitter
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
- from llama_index.llms.llama_cpp import LlamaCPP
9
  from huggingface_hub import hf_hub_download
10
  from llama_index.core.llms import ChatMessage
11
  from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
@@ -45,18 +45,23 @@ def read_google_sheets():
45
  return f"❌ ERROR: {str(e)}"
46
 
47
  # ===================================
48
- # 2️⃣ Inisialisasi Model Llama
49
  # ===================================
50
  def initialize_llama_model():
51
- model_path = hf_hub_download("TheBLoke/zephyr-7b-beta-GGUF", "zephyr-7b-beta.Q4_K_M.gguf")
52
- print(model_path)
53
- return model_path
 
 
 
 
 
54
 
55
  # ===================================
56
  # 3️⃣ Inisialisasi Pengaturan Model
57
  # ===================================
58
- def initialize_settings(model_path):
59
- Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
60
 
61
  # ===================================
62
  # 4️⃣ Inisialisasi Index & Chat Engine
@@ -93,7 +98,7 @@ def clean_response(response):
93
  # ===================================
94
  # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
95
  # ===================================
96
- def generate_response(message, history, chat_engine):
97
  if history is None:
98
  history = []
99
 
@@ -112,30 +117,35 @@ def generate_response(message, history, chat_engine):
112
  ),
113
  ]
114
 
115
- response = chat_engine.stream_chat(message)
116
- cleaned_text = clean_response(response) # 🔹 Gunakan fungsi clean_response()
 
 
 
 
117
 
118
- history.append((message, cleaned_text)) # 🔹 Pastikan hanya teks yang masuk ke history
119
- return cleaned_text
 
120
 
121
  # ===================================
122
  # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
123
  # ===================================
124
  def main():
125
- model_path = initialize_llama_model()
126
- initialize_settings(model_path)
127
 
128
  index = initialize_index()
129
  chat_engine = initialize_chat_engine(index)
130
 
131
  def chatbot_response(message, history=None):
132
- return generate_response(message, history, chat_engine)
133
 
134
  gr.Interface(
135
  fn=chatbot_response,
136
- inputs="text",
137
- outputs="text",
138
- ).launch(server_port=8000)
139
 
140
  if __name__ == "__main__":
141
  main()
 
1
  import gradio as gr
2
  import gspread
3
+ import torch
4
  from oauth2client.service_account import ServiceAccountCredentials
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  from llama_index.core import VectorStoreIndex, Settings
7
  from llama_index.core.node_parser import SentenceSplitter
8
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 
9
  from huggingface_hub import hf_hub_download
10
  from llama_index.core.llms import ChatMessage
11
  from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
 
45
  return f"❌ ERROR: {str(e)}"
46
 
47
  # ===================================
48
+ # 2️⃣ Inisialisasi Model Llama di GPU
49
  # ===================================
50
  def initialize_llama_model():
51
+ model_name = "HuggingFaceH4/zephyr-7b-beta" # Pastikan model ini kompatibel dengan transformers
52
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ model_name,
55
+ torch_dtype=torch.float16, # Gunakan float16 agar lebih hemat memori
56
+ device_map="auto" # Memastikan model berjalan di GPU jika tersedia
57
+ )
58
+ return tokenizer, model
59
 
60
  # ===================================
61
  # 3️⃣ Inisialisasi Pengaturan Model
62
  # ===================================
63
+ def initialize_settings(tokenizer, model):
64
+ Settings.llm = model # Tidak lagi menggunakan LlamaCPP
65
 
66
  # ===================================
67
  # 4️⃣ Inisialisasi Index & Chat Engine
 
98
  # ===================================
99
  # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
100
  # ===================================
101
+ def generate_response(message, history, tokenizer, model):
102
  if history is None:
103
  history = []
104
 
 
117
  ),
118
  ]
119
 
120
+ # Tokenisasi input
121
+ inputs = tokenizer(message, return_tensors="pt").to("cuda")
122
+
123
+ # Hasilkan respons
124
+ with torch.no_grad():
125
+ outputs = model.generate(**inputs, max_length=512)
126
 
127
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
128
+ history.append((message, response))
129
+ return response
130
 
131
  # ===================================
132
  # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
133
  # ===================================
134
  def main():
135
+ tokenizer, model = initialize_llama_model()
136
+ initialize_settings(tokenizer, model)
137
 
138
  index = initialize_index()
139
  chat_engine = initialize_chat_engine(index)
140
 
141
  def chatbot_response(message, history=None):
142
+ return generate_response(message, history, tokenizer, model)
143
 
144
  gr.Interface(
145
  fn=chatbot_response,
146
+ inputs=["text"],
147
+ outputs=["text"],
148
+ ).launch()
149
 
150
  if __name__ == "__main__":
151
  main()