Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
import gspread
|
|
|
3 |
from oauth2client.service_account import ServiceAccountCredentials
|
4 |
-
from
|
5 |
from llama_index.core import VectorStoreIndex, Settings
|
6 |
from llama_index.core.node_parser import SentenceSplitter
|
7 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
8 |
-
from llama_index.llms.llama_cpp import LlamaCPP
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from llama_index.core.llms import ChatMessage
|
11 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
@@ -45,18 +45,23 @@ def read_google_sheets():
|
|
45 |
return f"❌ ERROR: {str(e)}"
|
46 |
|
47 |
# ===================================
|
48 |
-
# 2️⃣ Inisialisasi Model Llama
|
49 |
# ===================================
|
50 |
def initialize_llama_model():
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# ===================================
|
56 |
# 3️⃣ Inisialisasi Pengaturan Model
|
57 |
# ===================================
|
58 |
-
def initialize_settings(
|
59 |
-
Settings.llm = LlamaCPP
|
60 |
|
61 |
# ===================================
|
62 |
# 4️⃣ Inisialisasi Index & Chat Engine
|
@@ -93,7 +98,7 @@ def clean_response(response):
|
|
93 |
# ===================================
|
94 |
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
|
95 |
# ===================================
|
96 |
-
def generate_response(message, history,
|
97 |
if history is None:
|
98 |
history = []
|
99 |
|
@@ -112,30 +117,35 @@ def generate_response(message, history, chat_engine):
|
|
112 |
),
|
113 |
]
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
|
|
|
120 |
|
121 |
# ===================================
|
122 |
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
|
123 |
# ===================================
|
124 |
def main():
|
125 |
-
|
126 |
-
initialize_settings(
|
127 |
|
128 |
index = initialize_index()
|
129 |
chat_engine = initialize_chat_engine(index)
|
130 |
|
131 |
def chatbot_response(message, history=None):
|
132 |
-
return generate_response(message, history,
|
133 |
|
134 |
gr.Interface(
|
135 |
fn=chatbot_response,
|
136 |
-
inputs="text",
|
137 |
-
outputs="text",
|
138 |
-
).launch(
|
139 |
|
140 |
if __name__ == "__main__":
|
141 |
main()
|
|
|
1 |
import gradio as gr
|
2 |
import gspread
|
3 |
+
import torch
|
4 |
from oauth2client.service_account import ServiceAccountCredentials
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
from llama_index.core import VectorStoreIndex, Settings
|
7 |
from llama_index.core.node_parser import SentenceSplitter
|
8 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from llama_index.core.llms import ChatMessage
|
11 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
|
|
45 |
return f"❌ ERROR: {str(e)}"
|
46 |
|
47 |
# ===================================
|
48 |
+
# 2️⃣ Inisialisasi Model Llama di GPU
|
49 |
# ===================================
|
50 |
def initialize_llama_model():
|
51 |
+
model_name = "HuggingFaceH4/zephyr-7b-beta" # Pastikan model ini kompatibel dengan transformers
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
53 |
+
model = AutoModelForCausalLM.from_pretrained(
|
54 |
+
model_name,
|
55 |
+
torch_dtype=torch.float16, # Gunakan float16 agar lebih hemat memori
|
56 |
+
device_map="auto" # Memastikan model berjalan di GPU jika tersedia
|
57 |
+
)
|
58 |
+
return tokenizer, model
|
59 |
|
60 |
# ===================================
|
61 |
# 3️⃣ Inisialisasi Pengaturan Model
|
62 |
# ===================================
|
63 |
+
def initialize_settings(tokenizer, model):
|
64 |
+
Settings.llm = model # Tidak lagi menggunakan LlamaCPP
|
65 |
|
66 |
# ===================================
|
67 |
# 4️⃣ Inisialisasi Index & Chat Engine
|
|
|
98 |
# ===================================
|
99 |
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
|
100 |
# ===================================
|
101 |
+
def generate_response(message, history, tokenizer, model):
|
102 |
if history is None:
|
103 |
history = []
|
104 |
|
|
|
117 |
),
|
118 |
]
|
119 |
|
120 |
+
# Tokenisasi input
|
121 |
+
inputs = tokenizer(message, return_tensors="pt").to("cuda")
|
122 |
+
|
123 |
+
# Hasilkan respons
|
124 |
+
with torch.no_grad():
|
125 |
+
outputs = model.generate(**inputs, max_length=512)
|
126 |
|
127 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
128 |
+
history.append((message, response))
|
129 |
+
return response
|
130 |
|
131 |
# ===================================
|
132 |
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
|
133 |
# ===================================
|
134 |
def main():
|
135 |
+
tokenizer, model = initialize_llama_model()
|
136 |
+
initialize_settings(tokenizer, model)
|
137 |
|
138 |
index = initialize_index()
|
139 |
chat_engine = initialize_chat_engine(index)
|
140 |
|
141 |
def chatbot_response(message, history=None):
|
142 |
+
return generate_response(message, history, tokenizer, model)
|
143 |
|
144 |
gr.Interface(
|
145 |
fn=chatbot_response,
|
146 |
+
inputs=["text"],
|
147 |
+
outputs=["text"],
|
148 |
+
).launch()
|
149 |
|
150 |
if __name__ == "__main__":
|
151 |
main()
|