Spaces:

BramLeo
/

adminzy

Paused

App Files Files Community

BramLeo commited on Mar 19

Commit

b1248ec

verified ·

1 Parent(s): e6cb93d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -19

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import gradio as gr
 import gspread
 from oauth2client.service_account import ServiceAccountCredentials
-from llama_cpp import Llama
 from llama_index.core import VectorStoreIndex, Settings
 from llama_index.core.node_parser import SentenceSplitter
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.llama_cpp import LlamaCPP
 from huggingface_hub import hf_hub_download
 from llama_index.core.llms import ChatMessage
 from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
@@ -45,18 +45,23 @@ def read_google_sheets():
         return f"❌ ERROR: {str(e)}"
 # ===================================
-# 2️⃣ Inisialisasi Model Llama
 # ===================================
 def initialize_llama_model():
-    model_path = hf_hub_download("TheBLoke/zephyr-7b-beta-GGUF", "zephyr-7b-beta.Q4_K_M.gguf")
-    print(model_path)
-    return model_path
 # ===================================
 # 3️⃣ Inisialisasi Pengaturan Model
 # ===================================
-def initialize_settings(model_path):
-    Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
 # ===================================
 # 4️⃣ Inisialisasi Index & Chat Engine
@@ -93,7 +98,7 @@ def clean_response(response):
 # ===================================
 # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
 # ===================================
-def generate_response(message, history, chat_engine):
     if history is None:
         history = []
@@ -112,30 +117,35 @@ def generate_response(message, history, chat_engine):
         ),
     ]
-    response = chat_engine.stream_chat(message)
-    cleaned_text = clean_response(response)  # 🔹 Gunakan fungsi clean_response()
-    history.append((message, cleaned_text))  # 🔹 Pastikan hanya teks yang masuk ke history
-    return cleaned_text
 # ===================================
 # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
 # ===================================
 def main():
-    model_path = initialize_llama_model()
-    initialize_settings(model_path)
     index = initialize_index()
     chat_engine = initialize_chat_engine(index)
     def chatbot_response(message, history=None):
-        return generate_response(message, history, chat_engine)
     gr.Interface(
         fn=chatbot_response,
-        inputs="text",
-        outputs="text",
-    ).launch(server_port=8000)
 if __name__ == "__main__":
     main()

 import gradio as gr
 import gspread
+import torch
 from oauth2client.service_account import ServiceAccountCredentials
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from llama_index.core import VectorStoreIndex, Settings
 from llama_index.core.node_parser import SentenceSplitter
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from huggingface_hub import hf_hub_download
 from llama_index.core.llms import ChatMessage
 from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
         return f"❌ ERROR: {str(e)}"
 # ===================================
+# 2️⃣ Inisialisasi Model Llama di GPU
 # ===================================
 def initialize_llama_model():
+    model_name = "HuggingFaceH4/zephyr-7b-beta"  # Pastikan model ini kompatibel dengan transformers
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,  # Gunakan float16 agar lebih hemat memori
+        device_map="auto"  # Memastikan model berjalan di GPU jika tersedia
+    )
+    return tokenizer, model
 # ===================================
 # 3️⃣ Inisialisasi Pengaturan Model
 # ===================================
+def initialize_settings(tokenizer, model):
+    Settings.llm = model  # Tidak lagi menggunakan LlamaCPP
 # ===================================
 # 4️⃣ Inisialisasi Index & Chat Engine
 # ===================================
 # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
 # ===================================
+def generate_response(message, history, tokenizer, model):
     if history is None:
         history = []
         ),
     ]
+    # Tokenisasi input
+    inputs = tokenizer(message, return_tensors="pt").to("cuda")
+    # Hasilkan respons
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=512)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    history.append((message, response))
+    return response
 # ===================================
 # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
 # ===================================
 def main():
+    tokenizer, model = initialize_llama_model()
+    initialize_settings(tokenizer, model)
     index = initialize_index()
     chat_engine = initialize_chat_engine(index)
     def chatbot_response(message, history=None):
+        return generate_response(message, history, tokenizer, model)
     gr.Interface(
         fn=chatbot_response,
+        inputs=["text"],
+        outputs=["text"],
+    ).launch()
 if __name__ == "__main__":
     main()