BramLeo commited on
Commit
cf05cdf
Β·
verified Β·
1 Parent(s): 6c29747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -22
app.py CHANGED
@@ -3,43 +3,70 @@ import gspread
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
  from oauth2client.service_account import ServiceAccountCredentials
 
 
 
 
6
 
7
- # =============== 1. Cache Google Sheets ===============
8
- cached_text_data = None
9
 
10
- def search_google_sheets(user_query):
11
  try:
12
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
13
  creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
14
  client = gspread.authorize(creds)
15
-
16
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
17
  sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
18
-
19
- matched_data = []
20
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
21
-
22
  for sheet_name in sheet_names:
23
- sheet = spreadsheet.worksheet(sheet_name)
24
- data = sheet.get_all_values()
25
- for row in data:
26
- row_text = " | ".join(row)
27
- if user_query.lower() in row_text.lower():
28
- matched_data.append(row_text)
29
-
30
- return "\n".join(matched_data) if matched_data else "Maaf, saya tidak menemukan informasi yang relevan."
31
-
 
32
  except Exception as e:
33
  return f"❌ ERROR: {str(e)}"
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # =============== 2. Load Model Transformers ===============
36
  def load_model():
37
- model_id = "HuggingFaceH4/zephyr-7b-beta" # Bisa ganti ke Zephyr juga
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
41
- device_map="auto", # βœ… Otomatis ke GPU
42
- torch_dtype=torch.float16 # Ganti ke bfloat16 jika float16 bermasalah
43
  )
44
  pipe = pipeline(
45
  "text-generation",
@@ -52,7 +79,6 @@ def load_model():
52
  )
53
  return pipe
54
 
55
-
56
  # =============== 3. Buat Prompt dan Jawaban ===============
57
  def generate_prompt(user_message, context_data):
58
  prompt = f"""
@@ -66,10 +92,9 @@ Anda adalah chatbot HRD yang membantu karyawan memahami administrasi perusahaan.
66
  """
67
  return prompt.strip()
68
 
69
-
70
  # =============== 4. Generate Response ===============
71
  def generate_response(message, history, pipe):
72
- context = search_google_sheets(message) # πŸ” Cari hanya bagian yang relevan
73
  full_prompt = generate_prompt(message, context)
74
  response = pipe(full_prompt)[0]["generated_text"]
75
 
@@ -81,6 +106,7 @@ def generate_response(message, history, pipe):
81
  # =============== 5. Jalankan Gradio ===============
82
  def main():
83
  pipe = load_model()
 
84
 
85
  def chatbot_response(message, history):
86
  return generate_response(message, history, pipe)
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
  from oauth2client.service_account import ServiceAccountCredentials
6
+ from llama_index.core import VectorStoreIndex, Settings
7
+ from llama_index.core.node_parser import SentenceSplitter
8
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
9
+ from llama_index.core.schema import Document
10
 
11
+ # =============== 1. Cache dan Inisialisasi Index Google Sheets ===============
12
+ cached_index = None
13
 
14
+ def read_google_sheets():
15
  try:
16
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
17
  creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
18
  client = gspread.authorize(creds)
19
+
20
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
21
  sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
22
+
23
+ all_data = []
24
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
25
+
26
  for sheet_name in sheet_names:
27
+ try:
28
+ sheet = spreadsheet.worksheet(sheet_name)
29
+ data = sheet.get_all_values()
30
+ all_data.append(f"=== Data dari {sheet_name.upper()} ===")
31
+ all_data.extend([" | ".join(row) for row in data])
32
+ all_data.append("\n")
33
+ except gspread.exceptions.WorksheetNotFound:
34
+ all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.")
35
+
36
+ return "\n".join(all_data).strip()
37
  except Exception as e:
38
  return f"❌ ERROR: {str(e)}"
39
 
40
+ def initialize_index():
41
+ global cached_index
42
+ text_data = read_google_sheets()
43
+ document = Document(text=text_data)
44
+ parser = SentenceSplitter(chunk_size=100, chunk_overlap=30)
45
+ nodes = parser.get_nodes_from_documents([document])
46
+
47
+ embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
48
+ Settings.embed_model = embedding
49
+
50
+ cached_index = VectorStoreIndex(nodes)
51
+
52
+ def search_google_sheets_vector(query):
53
+ if cached_index is None:
54
+ initialize_index()
55
+
56
+ retriever = cached_index.as_retriever(similarity_top_k=3)
57
+ retrieved_nodes = retriever.retrieve(query)
58
+
59
+ results = [node.text for node in retrieved_nodes]
60
+ return "\n".join(results) if results else "Maaf, saya tidak menemukan informasi yang relevan."
61
+
62
  # =============== 2. Load Model Transformers ===============
63
  def load_model():
64
+ model_id = "HuggingFaceH4/zephyr-7b-beta"
65
  tokenizer = AutoTokenizer.from_pretrained(model_id)
66
  model = AutoModelForCausalLM.from_pretrained(
67
  model_id,
68
+ device_map="auto",
69
+ torch_dtype=torch.float16
70
  )
71
  pipe = pipeline(
72
  "text-generation",
 
79
  )
80
  return pipe
81
 
 
82
  # =============== 3. Buat Prompt dan Jawaban ===============
83
  def generate_prompt(user_message, context_data):
84
  prompt = f"""
 
92
  """
93
  return prompt.strip()
94
 
 
95
  # =============== 4. Generate Response ===============
96
  def generate_response(message, history, pipe):
97
+ context = search_google_sheets_vector(message) # πŸ” Pencarian berbasis vektor
98
  full_prompt = generate_prompt(message, context)
99
  response = pipe(full_prompt)[0]["generated_text"]
100
 
 
106
  # =============== 5. Jalankan Gradio ===============
107
  def main():
108
  pipe = load_model()
109
+ initialize_index() # πŸ”Ή Inisialisasi index sebelum chatbot berjalan
110
 
111
  def chatbot_response(message, history):
112
  return generate_response(message, history, pipe)