File size: 18,426 Bytes
794a84e
 
 
 
 
 
 
 
 
 
 
9b1aa73
794a84e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12312f0
794a84e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d55c61d
794a84e
 
9aa3981
794a84e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
#imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from googlesearch import search
import google.generativeai as genai
import warnings
import streamlit as st
warnings.filterwarnings("ignore")
#Gemini configuration
gemini_api_key = st.secrets["Gemini_api"]
genai.configure(api_key=gemini_api_key)
safety_settings = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]
#CRAG
Internal_knowledge_base = "lemh201 (2).pdf"
def get_doc(question):
    """
    Retrieves the relevant document page content based on the given question.

    Args:
        question (str): The question to search for in the document.

    Returns:
        str: The page content of the most relevant document.
    """
    pdf = Internal_knowledge_base
    loader = PyPDFLoader(pdf)
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=50)
    all_splits = text_splitter.split_documents(docs)

    embegging = GPT4AllEmbeddings()

    vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
    retriver = vectorstore.as_retriever()
    docs = retriver.get_relevant_documents(question, k=1)
    return docs[0].page_content
def get_prompt_retriever(context, question):
    """
    Returns a prompt for a grader assessing the relevance of a retrieved document to a user question.

    Parameters:
    context (str): The retrieved document.
    question (str): The user question.

    Returns:
    str: The prompt for the grader, including the retrieved document, user question, and instructions for grading.
    """
    return f'''You are grader assessing relavance of a retrieved document to a user question. \n
    Here is the retrieved document:\n\n {context} \n
    Here is the user question:\n\n {question} \n
    If the document document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test.The goal is to filter out erroneous retrievels. \n
    Give a  score between 0 and 1 score to indicate the document is relevant to the question. \n
    Provide the score without any premable or explaination. \n'''
def get_score(docs, question):
    """
    Calculates the score for a given question based on the provided documents.

    Parameters:
    - docs (list): A list of documents to consider for generating the score.
    - question (str): The question for which the score needs to be calculated.

    Returns:
    - float: The score for the given question.
    """
    model_score_crag = genai.GenerativeModel('gemini-pro')
    response = model_score_crag.generate_content(get_prompt_retriever(docs, question), safety_settings=safety_settings)
    return float(response.text)
def get_prompt_rewriter(question):
    """
    Returns a rewritten prompt for a given question.

    Parameters:
    question (str): The original user question.

    Returns:
    str: The rewritten prompt.

    """
    return f'''You are a question rewriter. \n
    Here is the user question:\n\n {question} \n
    Rewrite the question to make it more clear and concise. \n
    At the same time, try to keep the meaning of the question the same. \n
    '''
def rewrite_question(question):
    """
    Rewrites the given question using the Gemini API.

    Args:
        question (str): The original question to be rewritten.

    Returns:
        str: The rewritten question generated by the Gemini model.
    """
    model_prompt_rewritter = genai.GenerativeModel('gemini-pro')
    response = model_prompt_rewritter.generate_content(get_prompt_rewriter(question), safety_settings=safety_settings)
    return response.text
def refine_doc(doc, question):
    """
    Refines the given document by splitting it into smaller chunks, embedding them, and retrieving the most relevant documents based on a given question.

    Args:
        doc (str): The document to be refined.
        question (str): The question to find relevant documents for.

    Returns:
        tuple: A tuple containing the indices of the best documents and the best documents themselves.
    """
    file = open('docs_to_refine.md', 'w', encoding="utf-8") 
    file.write(doc) 
    file.close()
    loader = TextLoader('docs_to_refine.md', encoding='UTF-8')
    docs_to_refine = loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=50)
    all_splits = text_splitter.split_documents(docs_to_refine)

    embegging = GPT4AllEmbeddings()

    vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
    retriver = vectorstore.as_retriever()

    docs_refined = retriver.get_relevant_documents(question, k=1)

    score = []
    for i in docs_refined:
        score.append(get_score(i.page_content, question))
    best_doc_index = sorted(range(len(score)), key=lambda i: score[i])[-2:]
    best_doc = [docs_refined[i] for i in best_doc_index]
    return best_doc_index, best_doc
def web_search(query, num_results=5):
    """
    Perform a web search using the specified query and return a list of results.

    Args:
        query (str): The search query.
        num_results (int, optional): The number of search results to retrieve. Defaults to 5.

    Returns:
        list: A list of search results.
    """
    results = []
    for result in search(query, num_results=num_results):
        results.append(result)
    return results
def External_Knowledge(question):
    """
    Retrieves external knowledge related to the given question.

    Args:
        question (str): The question to search for external knowledge.

    Returns:
        str: The page content of the most relevant document found.
    """
    url= web_search(question)[0]
    loader=WebBaseLoader(url)
    docs=loader.load()

    text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500,chunk_overlap=50)
    all_splits_=text_splitter.split_documents(docs)


    embedding=GPT4AllEmbeddings()

    vectorstore_=Chroma.from_documents(documents=all_splits_,collection_name="rag-chroma",embedding=embedding)
    retriver_=vectorstore_.as_retriever()

    docs=retriver_.get_relevant_documents(question,k=1)
    return docs[0].page_content
def CRAG(question):
    """
    Retrieves relevant documents based on the given question and returns the content of the documents.

    Args:
        question (str): The question to be answered.

    Returns:
        str: The content of the relevant documents.

    """
    docs=get_doc(question)
    score=get_score(docs,question)
    if score >=0.7 :
        score_refined_doc,refined_doc=refine_doc(docs,question)
        return refined_doc[0].page_content + '\n\n' + refined_doc[1].page_content
    elif 0.3<score <0.7 :
        score_refined_doc,refined_doc=refine_doc(docs,question)
        external_knowledge=External_Knowledge(question)
        return external_knowledge + '\n\n' + refined_doc[score_refined_doc[-1]].page_content
    else:
        external_knowledge=External_Knowledge(question)
        return external_knowledge
#LLM Blender
def PairRanker(instruction,candidate1,candidate2):
    prompt = f'''
Instruction:
${instruction}
Input:
${input}
Candidate A:
${candidate1}
Candidate B:
${candidate2}
Given the instruction and input above, please compare the two candidates.
You only have 4 choices to output:
If you think A is better, please output: 1. A is better
If you think B is better, please output: 2. B is better
If you think both are good enough correctly give the answer, please output: 3. Same good
If you think both are bad and do not follow the instruction, please output: 4. Same bad
Do not output anything else except the 4 choices above.
Output your choice below:

'''
    model_pair_ranker = genai.GenerativeModel('gemini-pro')
    response = model_pair_ranker.generate_content(prompt,safety_settings=safety_settings)
    return response.text
def GenFuser(instruction,ranking,outputs,top_models=3):
    prompt =f''' 
    Instruction:{instruction} \n
    The outputs are in order of their quality and correctness. \n
    '''
    for i in range(top_models):
        prompt += f'''\n Output {i+1}: {outputs[ranking[i]]} \n'''
    prompt += f'''\n Fuse the outputs above to generate a better output. \n'''
    model_gen_fuser = genai.GenerativeModel('gemini-pro')
    response = model_gen_fuser.generate_content(prompt,safety_settings=safety_settings)
    return response.text
def ranking_matrix(instruction,candidates,top_models=3):
    outputs = []
    for i in range(top_models):
        dummy = []
        for j in range(top_models):
            z=PairRanker(instruction,candidates[i],candidates[j])
            if z == "1. A is better":
                dummy.append(1)
            elif z == "2. B is better":
                dummy.append(-1)
            elif z == "3. Same good" or z == "4. Same bad":
                dummy.append(0)
        outputs.append(dummy)
    return outputs
def rank_model(instruction,candidates,top_models=3):
    matrix = ranking_matrix(instruction,candidates,top_models)
    s=[]
    for i in range(top_models):
        k=0
        for j in range(top_models):
            k+=(matrix[i][j]-matrix[j][i])
        s.append(k)
    sort_index = [i for i, x in sorted(enumerate(s), key=lambda x: x[1])]
    return sort_index[::-1]
#Self Discovery
REASONING_MODULES = [
    "1. How could I devise an experiment to help solve that problem?",
    "2. Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made.",
    "3. How could I measure progress on this problem?",
    "4. How can I simplify the problem so that it is easier to solve?",
    "5. What are the key assumptions underlying this problem?",
    "6. What are the potential risks and drawbacks of each solution?",
    "7. What are the alternative perspectives or viewpoints on this problem?",
    "8. What are the long-term implications of this problem and its solutions?",
    "9. How can I break down this problem into smaller, more manageable parts?",
    "10. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking.",
    "11. Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality.",
    "12. Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions.",
    "13. Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole.",
    "14. Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits.",
    "15. Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches.",
    "16. What is the core issue or problem that needs to be addressed?",
    "17. What are the underlying causes or factors contributing to the problem?",
    "18. Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned?",
    "19. What are the potential obstacles or challenges that might arise in solving this problem?",
    "20. Are there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed?",
    "21. Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?",
    "22. What resources (financial, human, technological, etc.) are needed to tackle the problem effectively?",
    "23. How can progress or success in solving the problem be measured or evaluated?",
    "24. What indicators or metrics can be used?",
    "25. Is the problem a technical or practical one that requires a specific expertise or skill set? Or is it more of a conceptual or theoretical problem?",
    "26. Does the problem involve a physical constraint, such as limited resources, infrastructure, or space?",
    "27. Is the problem related to human behavior, such as a social, cultural, or psychological issue?",
    "28. Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives?",
    "29. Is the problem an analytical one that requires data analysis, modeling, or optimization techniques?",
    "30. Is the problem a design challenge that requires creative solutions and innovation?",
    "31. Does the problem require addressing systemic or structural issues rather than just individual instances?",
    "32. Is the problem time-sensitive or urgent, requiring immediate attention and action?",
    "33. What kinds of solution typically are produced for this kind of problem specification?",
    "34. Given the problem specification and the current best solution, have a guess about other possible solutions."
    "35. Let’s imagine the current best solution is totally wrong, what other ways are there to think about the problem specification?"
    "36. What is the best way to modify this current best solution, given what you know about these kinds of problem specification?"
    "37. Ignoring the current best solution, create an entirely new solution to the problem."
    "38. Let’s think step by step ."
    "39. Let’s make a step by step plan and implement it with good notation and explanation.",
]

REASONING_MODULES = "\n".join(REASONING_MODULES)
def select_prompt(task):
    return f"Given the task: {task}, which of the following reasoning modules are relevant? Do not elaborate on why.\n\n {REASONING_MODULES}"
def adapt_prompt(task, selected_modules):
    return f"Without working out the full solution, adapt the following reasoning modules to be specific to our task:\n{selected_modules}\n\nOur task:\n{task}"
def implement_prompt(task,adapted_modules):
    return f"Without working out the full solution, create an actionable reasoning structure for the task using these adapted reasoning modules:\n{adapted_modules}\n\nTask Description:\n{task}"
def reasoning_prompt(task,reasoning_structure):
    return f"Using the following reasoning structure: {reasoning_structure}\n\nSolve this task, providing your final answer: {task}"
def SELECT(task):
    model_selector=genai.GenerativeModel('gemini-pro')
    prompt_for_model=select_prompt(task)
    response = model_selector.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def ADAPT(task,selected_modules):
    prompt_for_model=adapt_prompt(task,selected_modules)
    model_adapter=genai.GenerativeModel('gemini-pro')
    response = model_adapter.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def IMPLEMENT(task,adapted_modules):
    prompt_for_model=implement_prompt(task,adapted_modules)
    model_implementor=genai.GenerativeModel('gemini-pro')
    response = model_implementor.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def final_ans(model_final_ans,task,reasoning_structure):
    prompt_for_model=reasoning_prompt(task,reasoning_structure)
    response = model_final_ans.generate_content(prompt_for_model,safety_settings=safety_settings)
    response=response.text
    return response
def SelfDiscovery(model_final_ans,task):
    selected_modules=SELECT(task)
    adapted_modules=ADAPT(task,selected_modules)
    reasoning_structure=IMPLEMENT(task,adapted_modules)
    final_answer=final_ans(model_final_ans,task,reasoning_structure)
    return final_answer
def Gemini_3x_blend(question):
    RAG=CRAG(question)
    generation_config_1 = genai.GenerationConfig(temperature=0.9)#Highly creative
    model_1 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_1)
    generation_config_2 = genai.GenerationConfig(temperature=0.5)#Midly creative
    model_2 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_2)
    generation_config_3 = genai.GenerationConfig(temperature=0.1)#Not at all creative
    model_3 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_3)
    prompt=f'''Context:{RAG} \n
Question:{question} \n
Based on the context answer the question. \n
    '''
    outputs_1 = SelfDiscovery(model_1,question)
    outputs_2 = SelfDiscovery(model_2,question)
    outputs_3 = SelfDiscovery(model_3,question)
    rank=rank_model(question,[outputs_1,outputs_2,outputs_3])
    fused_output=GenFuser(question,rank,[outputs_1,outputs_2,outputs_3])
    return fused_output
user_quest = st.text_input("Ask a question:")
btn = st.button("Ask")

if btn and user_quest:
    result = Gemini_3x_blend(user_quest)
    st.subheader("Response : ")
    st.text(result)