mkom_ugm_rag / app.py
robitalhazmi's picture
add cache_resource
29b19df
import streamlit as st
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_cohere import ChatCohere
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
load_dotenv('.env')
st.header("MKOM UGM RAG App")
@st.cache_resource
def get_rag_chain():
# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
web_paths=(
"https://um.ugm.ac.id/ragam-seleksi-pascasarjana/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-magister/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/",
"https://um.ugm.ac.id/prosedur-pendaftaran-magister/",
"https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/",
"https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/",
"https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/",
"https://um.ugm.ac.id/program-studi-program-magister-2/",
"https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/",
"https://um.ugm.ac.id/program-studi-program-doktor/",
"https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/",
"https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/",
"https://mkom.ugm.ac.id/alur-pendaftaran-magister/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/",
"https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/"
),
bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
llm = ChatCohere(model="command-r")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir.
Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban.
Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban.
{context}
Pertanyaan: {question}
Jawaban:"""
custom_rag_prompt = PromptTemplate.from_template(template)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| custom_rag_prompt
| llm
| StrOutputParser()
)
return rag_chain
rag_chain = get_rag_chain()
question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada")
if question:
response = rag_chain.invoke(question)
st.write(response)