oriza commited on
Commit
7085ded
·
verified ·
1 Parent(s): c750472

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -22
app.py CHANGED
@@ -18,7 +18,8 @@ import re
18
  from PyPDF2 import PdfReader
19
 
20
  #tempat vectordb
21
- dir = 'data3'
 
22
 
23
  #embeddings
24
  embeddings = OpenAIEmbeddings()
@@ -88,7 +89,7 @@ def get_text_chunks(text):
88
  def get_vectorstore(text_chunks):
89
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
90
  # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
91
- vectorstore = Chroma(persist_directory=dir, embedding_function=embeddings)
92
  return vectorstore
93
 
94
 
@@ -181,25 +182,25 @@ def main():
181
  if st.button("Re-Processing New Data"):
182
  with st.spinner("Processing..."):
183
  # BERITA
184
- # # Find a CSV files in the directory
185
- # sumber = glob.glob("berita/*.csv")
186
- # df = pd.read_csv(sumber[0])
187
- # banyakBerita = len(df)
188
- # print("sumber berita ditemukan")
189
-
190
- # #update banyak berita txt
191
- # with open("banyakBerita.txt", "w") as file:
192
- # file.write(str(banyakBerita))
193
- # print("update file text berita berhasil")
194
-
195
- # #combining and converting
196
- # df["combined"] = ""
197
- # for row in range(len(df)):
198
- # kombinasi = "berita ke-" + str(row+1) + " \n " + "judul: " + str(df['title'].loc[row]) + " \n " + "link: "+ str(df['url'].loc[row]) + " \n " + "tanggal rilis: " + str(df['datetime'].loc[row]) + " \n " + "penulis: " + str(df['author'].loc[row]) + " \n " + "isi berita: " + str(df['text'].loc[row]) + " \n " + "sumber: " + str(df['source'].loc[row]) + " \n "
199
- # df['combined'].loc[row] = kombinasi
200
- # listberita = df["combined"].tolist()
201
- # textberita = " ".join(listberita)
202
- # print("combining and converting berhasil")
203
 
204
  # directory ke pdf regulasi
205
  folder_path = 'pdf/'
@@ -235,7 +236,6 @@ def main():
235
  print("splitting final text berhasil")
236
 
237
  #save dengan chroma
238
- dirsave = "cumandoc"
239
  vectorstore = Chroma.from_texts(texts,
240
  embeddings,
241
  persist_directory=dirsave)
 
18
  from PyPDF2 import PdfReader
19
 
20
  #tempat vectordb
21
+ dirload = '24feb24-openaiv2'
22
+ dirsave = "terbaru"
23
 
24
  #embeddings
25
  embeddings = OpenAIEmbeddings()
 
89
  def get_vectorstore(text_chunks):
90
  # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
91
  # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
92
+ vectorstore = Chroma(persist_directory=dirload, embedding_function=embeddings)
93
  return vectorstore
94
 
95
 
 
182
  if st.button("Re-Processing New Data"):
183
  with st.spinner("Processing..."):
184
  # BERITA
185
+ # Find a CSV files in the directory
186
+ sumber = glob.glob("berita/*.csv")
187
+ df = pd.read_csv(sumber[0])
188
+ banyakBerita = len(df)
189
+ print("sumber berita ditemukan")
190
+
191
+ #update banyak berita txt
192
+ with open("banyakBerita.txt", "w") as file:
193
+ file.write(str(banyakBerita))
194
+ print("update file text berita berhasil")
195
+
196
+ #combining and converting
197
+ df["combined"] = ""
198
+ for row in range(len(df)):
199
+ kombinasi = "berita ke-" + str(row+1) + " \n " + "judul: " + str(df['title'].loc[row]) + " \n " + "link: "+ str(df['url'].loc[row]) + " \n " + "tanggal rilis: " + str(df['datetime'].loc[row]) + " \n " + "penulis: " + str(df['author'].loc[row]) + " \n " + "isi berita: " + str(df['text'].loc[row]) + " \n " + "sumber: " + str(df['source'].loc[row]) + " \n "
200
+ df['combined'].loc[row] = kombinasi
201
+ listberita = df["combined"].tolist()
202
+ textberita = " ".join(listberita)
203
+ print("combining and converting berhasil")
204
 
205
  # directory ke pdf regulasi
206
  folder_path = 'pdf/'
 
236
  print("splitting final text berhasil")
237
 
238
  #save dengan chroma
 
239
  vectorstore = Chroma.from_texts(texts,
240
  embeddings,
241
  persist_directory=dirsave)