File size: 3,621 Bytes
64cbc9f
8728fc3
 
 
 
 
 
 
 
 
 
 
64cbc9f
8728fc3
 
29b19df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8728fc3
29b19df
 
8728fc3
29b19df
 
 
8728fc3
29b19df
8728fc3
29b19df
8728fc3
29b19df
8728fc3
29b19df
8728fc3
29b19df
 
 
 
 
 
 
 
 
8728fc3
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_cohere import ChatCohere
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
load_dotenv('.env')

st.header("MKOM UGM RAG App")

@st.cache_resource
def get_rag_chain():
    # Only keep post title, headers, and content from the full HTML.
    bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))    
    loader = WebBaseLoader(
        web_paths=(
            "https://um.ugm.ac.id/ragam-seleksi-pascasarjana/",
            "https://um.ugm.ac.id/persyaratan-pendaftaran-magister/",
            "https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/",
            "https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/",
            "https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/",
            "https://um.ugm.ac.id/prosedur-pendaftaran-magister/",
            "https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/",
            "https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/",
            "https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/",
            "https://um.ugm.ac.id/program-studi-program-magister-2/",
            "https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/",
            "https://um.ugm.ac.id/program-studi-program-doktor/",
            "https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/",
            "https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/",
            "https://mkom.ugm.ac.id/alur-pendaftaran-magister/",
            "https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/",
            "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/",
            "https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/",
            "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/"
        ),
        bs_kwargs={"parse_only": bs4_strainer},
    )
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, chunk_overlap=200, add_start_index=True
    )
    all_splits = text_splitter.split_documents(docs)
    vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
    llm = ChatCohere(model="command-r")

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir.
    Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban.
    Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban.

    {context}

    Pertanyaan: {question}

    Jawaban:"""

    custom_rag_prompt = PromptTemplate.from_template(template)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | custom_rag_prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain

rag_chain = get_rag_chain()

question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada")
if question:
    response = rag_chain.invoke(question)
    st.write(response)