Spaces:
Sleeping
Sleeping
import streamlit as st | |
import bs4 | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain_cohere import ChatCohere | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.prompts import PromptTemplate | |
from dotenv import load_dotenv | |
load_dotenv('.env') | |
st.header("MKOM UGM RAG App") | |
def get_rag_chain(): | |
# Only keep post title, headers, and content from the full HTML. | |
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content")) | |
loader = WebBaseLoader( | |
web_paths=( | |
"https://um.ugm.ac.id/ragam-seleksi-pascasarjana/", | |
"https://um.ugm.ac.id/persyaratan-pendaftaran-magister/", | |
"https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/", | |
"https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/", | |
"https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/", | |
"https://um.ugm.ac.id/prosedur-pendaftaran-magister/", | |
"https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/", | |
"https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/", | |
"https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/", | |
"https://um.ugm.ac.id/program-studi-program-magister-2/", | |
"https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/", | |
"https://um.ugm.ac.id/program-studi-program-doktor/", | |
"https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/", | |
"https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/", | |
"https://mkom.ugm.ac.id/alur-pendaftaran-magister/", | |
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/", | |
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/", | |
"https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/", | |
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/" | |
), | |
bs_kwargs={"parse_only": bs4_strainer}, | |
) | |
docs = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, chunk_overlap=200, add_start_index=True | |
) | |
all_splits = text_splitter.split_documents(docs) | |
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base')) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6}) | |
llm = ChatCohere(model="command-r") | |
def format_docs(docs): | |
return "\n\n".join(doc.page_content for doc in docs) | |
template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir. | |
Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban. | |
Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban. | |
{context} | |
Pertanyaan: {question} | |
Jawaban:""" | |
custom_rag_prompt = PromptTemplate.from_template(template) | |
rag_chain = ( | |
{"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| custom_rag_prompt | |
| llm | |
| StrOutputParser() | |
) | |
return rag_chain | |
rag_chain = get_rag_chain() | |
question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada") | |
if question: | |
response = rag_chain.invoke(question) | |
st.write(response) |