Spaces:

oriza
/

climatechat

Sleeping

App Files Files Community

oriza commited on Feb 25, 2024

Commit

7085ded

verified ·

1 Parent(s): c750472

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -22

app.py CHANGED Viewed

@@ -18,7 +18,8 @@ import re
 from PyPDF2 import PdfReader
 #tempat vectordb
-dir = 'data3'
 #embeddings
 embeddings = OpenAIEmbeddings()
@@ -88,7 +89,7 @@ def get_text_chunks(text):
 def get_vectorstore(text_chunks):
     # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
     # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-    vectorstore = Chroma(persist_directory=dir, embedding_function=embeddings)
     return vectorstore
@@ -181,25 +182,25 @@ def main():
         if st.button("Re-Processing New Data"):
             with st.spinner("Processing..."):
                 # BERITA
-                # # Find a CSV files in the directory
-                # sumber = glob.glob("berita/*.csv")
-                # df = pd.read_csv(sumber[0])
-                # banyakBerita = len(df)
-                # print("sumber berita ditemukan")
-                # #update banyak berita txt
-                # with open("banyakBerita.txt", "w") as file:
-                #     file.write(str(banyakBerita))
-                # print("update file text berita berhasil")
-                # #combining and converting
-                # df["combined"] = ""
-                # for row in range(len(df)):
-                #     kombinasi = "berita ke-" + str(row+1) + " \n " + "judul: " + str(df['title'].loc[row]) + " \n " + "link: "+ str(df['url'].loc[row]) + " \n " + "tanggal rilis: " + str(df['datetime'].loc[row]) + " \n " + "penulis: " + str(df['author'].loc[row]) + " \n " + "isi berita: " + str(df['text'].loc[row]) + " \n " + "sumber: " + str(df['source'].loc[row]) + " \n "
-                #     df['combined'].loc[row] = kombinasi
-                # listberita = df["combined"].tolist()
-                # textberita = " ".join(listberita)
-                # print("combining and converting berhasil")
                 # directory ke pdf regulasi
                 folder_path = 'pdf/'
@@ -235,7 +236,6 @@ def main():
                 print("splitting final text berhasil")
                 #save dengan chroma
-                dirsave = "cumandoc"
                 vectorstore = Chroma.from_texts(texts,
                                                 embeddings,
                                                 persist_directory=dirsave)

 from PyPDF2 import PdfReader
 #tempat vectordb
+dirload = '24feb24-openaiv2'
+dirsave = "terbaru"
 #embeddings
 embeddings = OpenAIEmbeddings()
 def get_vectorstore(text_chunks):
     # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
     # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    vectorstore = Chroma(persist_directory=dirload, embedding_function=embeddings)
     return vectorstore
         if st.button("Re-Processing New Data"):
             with st.spinner("Processing..."):
                 # BERITA
+                # Find a CSV files in the directory
+                sumber = glob.glob("berita/*.csv")
+                df = pd.read_csv(sumber[0])
+                banyakBerita = len(df)
+                print("sumber berita ditemukan")
+                #update banyak berita txt
+                with open("banyakBerita.txt", "w") as file:
+                    file.write(str(banyakBerita))
+                print("update file text berita berhasil")
+                #combining and converting
+                df["combined"] = ""
+                for row in range(len(df)):
+                    kombinasi = "berita ke-" + str(row+1) + " \n " + "judul: " + str(df['title'].loc[row]) + " \n " + "link: "+ str(df['url'].loc[row]) + " \n " + "tanggal rilis: " + str(df['datetime'].loc[row]) + " \n " + "penulis: " + str(df['author'].loc[row]) + " \n " + "isi berita: " + str(df['text'].loc[row]) + " \n " + "sumber: " + str(df['source'].loc[row]) + " \n "
+                    df['combined'].loc[row] = kombinasi
+                listberita = df["combined"].tolist()
+                textberita = " ".join(listberita)
+                print("combining and converting berhasil")
                 # directory ke pdf regulasi
                 folder_path = 'pdf/'
                 print("splitting final text berhasil")
                 #save dengan chroma
                 vectorstore = Chroma.from_texts(texts,
                                                 embeddings,
                                                 persist_directory=dirsave)