Spaces:

Prasann15479
/

Gemini-3x-Blend

Sleeping

File size: 18,426 Bytes

#imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from googlesearch import search
import google.generativeai as genai
import warnings
import streamlit as st
warnings.filterwarnings("ignore")
#Gemini configuration
gemini_api_key = st.secrets["Gemini_api"]
genai.configure(api_key=gemini_api_key)
safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]
#CRAG
Internal_knowledge_base = "lemh201 (2).pdf"
def get_doc(question):
    """
    Retrieves the relevant document page content based on the given question.

    Args:
        question (str): The question to search for in the document.

    Returns:
        str: The page content of the most relevant document.
    """
    pdf = Internal_knowledge_base
    loader = PyPDFLoader(pdf)
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=50)
    all_splits = text_splitter.split_documents(docs)

    embegging = GPT4AllEmbeddings()

    vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
    retriver = vectorstore.as_retriever()
    docs = retriver.get_relevant_documents(question, k=1)
    return docs[0].page_content
def get_prompt_retriever(context, question):
    """
    Returns a prompt for a grader assessing the relevance of a retrieved document to a user question.

    Parameters:
    context (str): The retrieved document.
    question (str): The user question.

    Returns:
    str: The prompt for the grader, including the retrieved document, user question, and instructions for grading.
    """
    return f'''You are grader assessing relavance of a retrieved document to a user question. \n
    Here is the retrieved document:\n\n {context} \n
    Here is the user question:\n\n {question} \n
    If the document document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test.The goal is to filter out erroneous retrievels. \n
    Give a  score between 0 and 1 score to indicate the document is relevant to the question. \n
    Provide the score without any premable or explaination. \n'''
def get_score(docs, question):
    """
    Calculates the score for a given question based on the provided documents.

    Parameters:
    - docs (list): A list of documents to consider for generating the score.
    - question (str): The question for which the score needs to be calculated.

    Returns:
    - float: The score for the given question.
    """
    model_score_crag = genai.GenerativeModel('gemini-pro')
    response = model_score_crag.generate_content(get_prompt_retriever(docs, question), safety_settings=safety_settings)
    return float(response.text)
def get_prompt_rewriter(question):
    """
    Returns a rewritten prompt for a given question.

    Parameters:
    question (str): The original user question.

    Returns:
    str: The rewritten prompt.

    """
    return f'''You are a question rewriter. \n
    Here is the user question:\n\n {question} \n
    Rewrite the question to make it more clear and concise. \n
    At the same time, try to keep the meaning of the question the same. \n
    '''
def rewrite_question(question):
    """
    Rewrites the given question using the Gemini API.

    Args:
        question (str): The original question to be rewritten.

    Returns:
        str: The rewritten question generated by the Gemini model.
    """
    model_prompt_rewritter = genai.GenerativeModel('gemini-pro')
    response = model_prompt_rewritter.generate_content(get_prompt_rewriter(question), safety_settings=safety_settings)
    return response.text
def refine_doc(doc, question):
    """
    Refines the given document by splitting it into smaller chunks, embedding them, and retrieving the most relevant documents based on a given question.

    Args:
        doc (str): The document to be refined.
        question (str): The question to find relevant documents for.

    Returns:
        tuple: A tuple containing the indices of the best documents and the best documents themselves.
    """
    file = open('docs_to_refine.md', 'w', encoding="utf-8") 
    file.write(doc) 
    file.close()
    loader = TextLoader('docs_to_refine.md', encoding='UTF-8')
    docs_to_refine = loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=50)
    all_splits = text_splitter.split_documents(docs_to_refine)

    embegging = GPT4AllEmbeddings()

    vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
    retriver = vectorstore.as_retriever()

    docs_refined = retriver.get_relevant_documents(question, k=1)

    score = []
    for i in docs_refined:
        score.append(get_score(i.page_content, question))
    best_doc_index = sorted(range(len(score)), key=lambda i: score[i])[-2:]
    best_doc = [docs_refined[i] for i in best_doc_index]
    return best_doc_index, best_doc
def web_search(query, num_results=5):
    """
    Perform a web search using the specified query and return a list of results.

    Args:
        query (str): The search query.
        num_results (int, optional): The number of search results to retrieve. Defaults to 5.

    Returns:
        list: A list of search results.
    """
    results = []
    for result in search(query, num_results=num_results):
        results.append(result)
    return results
def External_Knowledge(question):
    """
    Retrieves external knowledge related to the given question.

    Args:
        question (str): The question to search for external knowledge.

    Returns:
        str: The page content of the most relevant document found.
    """
    url= web_search(question)[0]
    loader=WebBaseLoader(url)
    docs=loader.load()

    text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500,chunk_overlap=50)
    all_splits_=text_splitter.split_documents(docs)


    embedding=GPT4AllEmbeddings()

    vectorstore_=Chroma.from_documents(documents=all_splits_,collection_name="rag-chroma",embedding=embedding)
    retriver_=vectorstore_.as_retriever()

    docs=retriver_.get_relevant_documents(question,k=1)
    return docs[0].page_content
def CRAG(question):
    """
    Retrieves relevant documents based on the given question and returns the content of the documents.

    Args:
        question (str): The question to be answered.

    Returns:
        str: The content of the relevant documents.

    """
    docs=get_doc(question)
    score=get_score(docs,question)
    if score >=0.7 :
        score_refined_doc,refined_doc=refine_doc(docs,question)
        return refined_doc[0].page_content + '\n\n' + refined_doc[1].page_content
    elif 0.3<score <0.7 :
        score_refined_doc,refined_doc=refine_doc(docs,question)
        external_knowledge=External_Knowledge(question)
        return external_knowledge + '\n\n' + refined_doc[score_refined_doc[-1]].page_content
    else:
        external_knowledge=External_Knowledge(question)
        return external_knowledge
#LLM Blender
def PairRanker(instruction,candidate1,candidate2):
    prompt = f'''
Instruction:
${instruction}
Input:
${input}
Candidate A:
${candidate1}
Candidate B:
${candidate2}
Given the instruction and input above, please compare the two candidates.
You only have 4 choices to output:
If you think A is better, please output: 1. A is better
If you think B is better, please output: 2. B is better
If you think both are good enough correctly give the answer, please output: 3. Same good
If you think both are bad and do not follow the instruction, please output: 4. Same bad
Do not output anything else except the 4 choices above.
Output your choice below:

'''
    model_pair_ranker = genai.GenerativeModel('gemini-pro')
    response = model_pair_ranker.generate_content(prompt,safety_settings=safety_settings)
    return response.text
def GenFuser(instruction,ranking,outputs,top_models=3):
    prompt =f''' 
    Instruction:{instruction} \n
    The outputs are in order of their quality and correctness. \n
    '''
    for i in range(top_models):
        prompt += f'''\n Output {i+1}: {outputs[ranking[i]]} \n'''
    prompt += f'''\n Fuse the outputs above to generate a better output. \n'''
    model_gen_fuser = genai.GenerativeModel('gemini-pro')
    response = model_gen_fuser.generate_content(prompt,safety_settings=safety_settings)
    return response.text
def ranking_matrix(instruction,candidates,top_models=3):
    outputs = []
    for i in range(top_models):
        dummy = []
        for j in range(top_models):
            z=PairRanker(instruction,candidates[i],candidates[j])
            if z == "1. A is better":
                dummy.append(1)
            elif z == "2. B is better":
                dummy.append(-1)
            elif z == "3. Same good" or z == "4. Same bad":
                dummy.append(0)
        outputs.append(dummy)
    return outputs
def rank_model(instruction,candidates,top_models=3):
    matrix = ranking_matrix(instruction,candidates,top_models)
    s=[]
    for i in range(top_models):
        k=0
        for j in range(top_models):
            k+=(matrix[i][j]-matrix[j][i])
        s.append(k)
    sort_index = [i for i, x in sorted(enumerate(s), key=lambda x: x[1])]
    return sort_index[::-1]
#Self Discovery
REASONING_MODULES = [
    "1. How could I devise an experiment to help solve that problem?",
    "2. Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made.",
    "3. How could I measure progress on this problem?",
    "4. How can I simplify the problem so that it is easier to solve?",
    "5. What are the key assumptions underlying this problem?",
    "6. What are the potential risks and drawbacks of each solution?",
    "7. What are the alternative perspectives or viewpoints on this problem?",
    "8. What are the long-term implications of this problem and its solutions?",
    "9. How can I break down this problem into smaller, more manageable parts?",
    "10. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking.",
    "11. Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality.",
    "12. Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions.",
    "13. Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole.",
    "14. Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits.",
    "15. Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches.",
    "16. What is the core issue or problem that needs to be addressed?",
    "17. What are the underlying causes or factors contributing to the problem?",
    "18. Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned?",
    "19. What are the potential obstacles or challenges that might arise in solving this problem?",
    "20. Are there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed?",
    "21. Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?",
    "22. What resources (financial, human, technological, etc.) are needed to tackle the problem effectively?",
    "23. How can progress or success in solving the problem be measured or evaluated?",
    "24. What indicators or metrics can be used?",
    "25. Is the problem a technical or practical one that requires a specific expertise or skill set? Or is it more of a conceptual or theoretical problem?",
    "26. Does the problem involve a physical constraint, such as limited resources, infrastructure, or space?",
    "27. Is the problem related to human behavior, such as a social, cultural, or psychological issue?",
    "28. Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives?",
    "29. Is the problem an analytical one that requires data analysis, modeling, or optimization techniques?",
    "30. Is the problem a design challenge that requires creative solutions and innovation?",
    "31. Does the problem require addressing systemic or structural issues rather than just individual instances?",
    "32. Is the problem time-sensitive or urgent, requiring immediate attention and action?",
    "33. What kinds of solution typically are produced for this kind of problem specification?",
    "34. Given the problem specification and the current best solution, have a guess about other possible solutions."
    "35. Let’s imagine the current best solution is totally wrong, what other ways are there to think about the problem specification?"
    "36. What is the best way to modify this current best solution, given what you know about these kinds of problem specification?"
    "37. Ignoring the current best solution, create an entirely new solution to the problem."
    "38. Let’s think step by step ."
    "39. Let’s make a step by step plan and implement it with good notation and explanation.",
]

REASONING_MODULES = "\n".join(REASONING_MODULES)
def select_prompt(task):
    return f"Given the task: {task}, which of the following reasoning modules are relevant? Do not elaborate on why.\n\n {REASONING_MODULES}"
def adapt_prompt(task, selected_modules):
    return f"Without working out the full solution, adapt the following reasoning modules to be specific to our task:\n{selected_modules}\n\nOur task:\n{task}"
def implement_prompt(task,adapted_modules):
    return f"Without working out the full solution, create an actionable reasoning structure for the task using these adapted reasoning modules:\n{adapted_modules}\n\nTask Description:\n{task}"
def reasoning_prompt(task,reasoning_structure):
    return f"Using the following reasoning structure: {reasoning_structure}\n\nSolve this task, providing your final answer: {task}"
def SELECT(task):
    model_selector=genai.GenerativeModel('gemini-pro')
    prompt_for_model=select_prompt(task)
    response = model_selector.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def ADAPT(task,selected_modules):
    prompt_for_model=adapt_prompt(task,selected_modules)
    model_adapter=genai.GenerativeModel('gemini-pro')
    response = model_adapter.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def IMPLEMENT(task,adapted_modules):
    prompt_for_model=implement_prompt(task,adapted_modules)
    model_implementor=genai.GenerativeModel('gemini-pro')
    response = model_implementor.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def final_ans(model_final_ans,task,reasoning_structure):
    prompt_for_model=reasoning_prompt(task,reasoning_structure)
    response = model_final_ans.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def SelfDiscovery(model_final_ans,task):
    selected_modules=SELECT(task)
    adapted_modules=ADAPT(task,selected_modules)
    reasoning_structure=IMPLEMENT(task,adapted_modules)
    final_answer=final_ans(model_final_ans,task,reasoning_structure)
    return final_answer
def Gemini_3x_blend(question):
    RAG=CRAG(question)
    generation_config_1 = genai.GenerationConfig(temperature=0.9)#Highly creative
    model_1 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_1)
    generation_config_2 = genai.GenerationConfig(temperature=0.5)#Midly creative
    model_2 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_2)
    generation_config_3 = genai.GenerationConfig(temperature=0.1)#Not at all creative
    model_3 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_3)
    prompt=f'''Context:{RAG} \n
Question:{question} \n
Based on the context answer the question. \n
    '''
    outputs_1 = SelfDiscovery(model_1,question)
    outputs_2 = SelfDiscovery(model_2,question)
    outputs_3 = SelfDiscovery(model_3,question)
    rank=rank_model(question,[outputs_1,outputs_2,outputs_3])
    fused_output=GenFuser(question,rank,[outputs_1,outputs_2,outputs_3])
    return fused_output
user_quest = st.text_input("Ask a question:")
btn = st.button("Ask")

if btn and user_quest:
    result = Gemini_3x_blend(user_quest)
    st.subheader("Response : ")
    st.text(result)