File size: 4,631 Bytes
7388122 ba9c6f3 5b13161 ba9c6f3 cf05cdf 5b13161 cf05cdf 4b8ea86 cf05cdf ba9c6f3 bb495a5 cf05cdf bb495a5 cf05cdf bb495a5 cf05cdf bb495a5 cf05cdf bb495a5 cf05cdf 5b13161 cf05cdf 5b13161 cf05cdf 7d99583 5b13161 384b6d3 5b13161 4b8ea86 5b13161 4b8ea86 5b13161 6c29747 5b13161 cf05cdf 5b13161 7388122 5b13161 cf05cdf 5b13161 01db3a5 5b13161 ba9c6f3 b1ddb81 b1248ec 5b13161 ba9c6f3 7388122 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import gspread
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from oauth2client.service_account import ServiceAccountCredentials
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document
# =============== 1. Cache dan Inisialisasi Index Google Sheets ===============
cached_index = None
def read_google_sheets():
try:
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
all_data = []
spreadsheet = client.open_by_key(SPREADSHEET_ID)
for sheet_name in sheet_names:
try:
sheet = spreadsheet.worksheet(sheet_name)
data = sheet.get_all_values()
all_data.append(f"=== Data dari {sheet_name.upper()} ===")
all_data.extend([" | ".join(row) for row in data])
all_data.append("\n")
except gspread.exceptions.WorksheetNotFound:
all_data.append(f"β ERROR: Worksheet {sheet_name} tidak ditemukan.")
return "\n".join(all_data).strip()
except Exception as e:
return f"β ERROR: {str(e)}"
def initialize_index():
global cached_index
text_data = read_google_sheets()
document = Document(text=text_data)
parser = SentenceSplitter(chunk_size=100, chunk_overlap=30)
nodes = parser.get_nodes_from_documents([document])
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
Settings.embed_model = embedding
cached_index = VectorStoreIndex(nodes)
def search_google_sheets_vector(query):
if cached_index is None:
initialize_index()
retriever = cached_index.as_retriever(similarity_top_k=3)
retrieved_nodes = retriever.retrieve(query)
results = [node.text for node in retrieved_nodes]
return "\n".join(results) if results else "Maaf, saya tidak menemukan informasi yang relevan."
# =============== 2. Load Model Transformers ===============
def load_model():
model_id = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
temperature=0.7,
repetition_penalty=1.2,
do_sample=True,
)
return pipe
# =============== 3. Buat Prompt dan Jawaban ===============
def generate_prompt(user_message, context_data):
prompt = f"""
### SISTEM:
Anda adalah chatbot HRD yang membantu karyawan memahami administrasi perusahaan. Jangan menjawab menggunakan Bahasa Inggris. Gunakan Bahasa Indonesia dengan gaya profesional dan ramah. Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu. Jawaban harus singkat, jelas, dan sesuai konteks. Jangan memberikan jawaban untuk pertanyaan yang tidak diajukan oleh pengguna. Jangan menyertakan rekomendasi pertanyaan lain.
### DATA:
{context_data}
### PERTANYAAN:
{user_message}
### JAWABAN:
"""
return prompt.strip()
# =============== 4. Generate Response ===============
def generate_response(message, history, pipe):
context = search_google_sheets_vector(message) # π Pencarian berbasis vektor
full_prompt = generate_prompt(message, context)
response = pipe(full_prompt)[0]["generated_text"]
cleaned = response.split("### JAWABAN:")[-1].strip()
history = history or []
history.append((message, cleaned))
return cleaned
# =============== 5. Jalankan Gradio ===============
def main():
pipe = load_model()
initialize_index() # πΉ Inisialisasi index sebelum chatbot berjalan
def chatbot_response(message, history):
return generate_response(message, history, pipe)
gr.Interface(
fn=chatbot_response,
inputs=["text"],
outputs=["text"],
title="Chatbot HRD - Transformers",
theme="compact"
).launch(share=True)
if __name__ == "__main__":
main() |