Spaces:
Sleeping
Sleeping
File size: 18,453 Bytes
794a84e 12312f0 794a84e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
#imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from googlesearch import search
import google.generativeai as genai
import warnings
import streamlit as st
warnings.filterwarnings("ignore")
#Gemini configuration
gemini_api_key = "AIzaSyCdMEDGRTlN7_camesAFg6z0ygRK5fCHvs"
genai.configure(api_key=gemini_api_key)
safety_settings = [
{
"category": "HARM_CATEGORY_DANGEROUS",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
#CRAG
Internal_knowledge_base = "lemh201 (2).pdf"
def get_doc(question):
"""
Retrieves the relevant document page content based on the given question.
Args:
question (str): The question to search for in the document.
Returns:
str: The page content of the most relevant document.
"""
pdf = Internal_knowledge_base
loader = PyPDFLoader(pdf)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=50)
all_splits = text_splitter.split_documents(docs)
embegging = GPT4AllEmbeddings()
vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
retriver = vectorstore.as_retriever()
docs = retriver.get_relevant_documents(question, k=1)
return docs[0].page_content
def get_prompt_retriever(context, question):
"""
Returns a prompt for a grader assessing the relevance of a retrieved document to a user question.
Parameters:
context (str): The retrieved document.
question (str): The user question.
Returns:
str: The prompt for the grader, including the retrieved document, user question, and instructions for grading.
"""
return f'''You are grader assessing relavance of a retrieved document to a user question. \n
Here is the retrieved document:\n\n {context} \n
Here is the user question:\n\n {question} \n
If the document document contains keywords related to the user question, grade it as relevant. \n
It does not need to be a stringent test.The goal is to filter out erroneous retrievels. \n
Give a score between 0 and 1 score to indicate the document is relevant to the question. \n
Provide the score without any premable or explaination. \n'''
def get_score(docs, question):
"""
Calculates the score for a given question based on the provided documents.
Parameters:
- docs (list): A list of documents to consider for generating the score.
- question (str): The question for which the score needs to be calculated.
Returns:
- float: The score for the given question.
"""
model_score_crag = genai.GenerativeModel('gemini-pro')
response = model_score_crag.generate_content(get_prompt_retriever(docs, question), safety_settings=safety_settings)
return float(response.text)
def get_prompt_rewriter(question):
"""
Returns a rewritten prompt for a given question.
Parameters:
question (str): The original user question.
Returns:
str: The rewritten prompt.
"""
return f'''You are a question rewriter. \n
Here is the user question:\n\n {question} \n
Rewrite the question to make it more clear and concise. \n
At the same time, try to keep the meaning of the question the same. \n
'''
def rewrite_question(question):
"""
Rewrites the given question using the Gemini API.
Args:
question (str): The original question to be rewritten.
Returns:
str: The rewritten question generated by the Gemini model.
"""
model_prompt_rewritter = genai.GenerativeModel('gemini-pro')
response = model_prompt_rewritter.generate_content(get_prompt_rewriter(question), safety_settings=safety_settings)
return response.text
def refine_doc(doc, question):
"""
Refines the given document by splitting it into smaller chunks, embedding them, and retrieving the most relevant documents based on a given question.
Args:
doc (str): The document to be refined.
question (str): The question to find relevant documents for.
Returns:
tuple: A tuple containing the indices of the best documents and the best documents themselves.
"""
file = open('docs_to_refine.md', 'w', encoding="utf-8")
file.write(doc)
file.close()
loader = TextLoader('docs_to_refine.md', encoding='UTF-8')
docs_to_refine = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=50)
all_splits = text_splitter.split_documents(docs_to_refine)
embegging = GPT4AllEmbeddings()
vectorstore = Chroma.from_documents(documents=all_splits, collection_name="rag-chroma", embedding=embegging)
retriver = vectorstore.as_retriever()
docs_refined = retriver.get_relevant_documents(question, k=1)
score = []
for i in docs_refined:
score.append(get_score(i.page_content, question))
best_doc_index = sorted(range(len(score)), key=lambda i: score[i])[-2:]
best_doc = [docs_refined[i] for i in best_doc_index]
return best_doc_index, best_doc
def web_search(query, num_results=5):
"""
Perform a web search using the specified query and return a list of results.
Args:
query (str): The search query.
num_results (int, optional): The number of search results to retrieve. Defaults to 5.
Returns:
list: A list of search results.
"""
results = []
for result in search(query, num_results=num_results):
results.append(result)
return results
def External_Knowledge(question):
"""
Retrieves external knowledge related to the given question.
Args:
question (str): The question to search for external knowledge.
Returns:
str: The page content of the most relevant document found.
"""
url= web_search(question)[0]
loader=WebBaseLoader(url)
docs=loader.load()
text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500,chunk_overlap=50)
all_splits_=text_splitter.split_documents(docs)
embedding=GPT4AllEmbeddings()
vectorstore_=Chroma.from_documents(documents=all_splits_,collection_name="rag-chroma",embedding=embedding)
retriver_=vectorstore_.as_retriever()
docs=retriver_.get_relevant_documents(question,k=1)
return docs[0].page_content
def CRAG(question):
"""
Retrieves relevant documents based on the given question and returns the content of the documents.
Args:
question (str): The question to be answered.
Returns:
str: The content of the relevant documents.
"""
docs=get_doc(question)
score=get_score(docs,question)
if score >=0.7 :
score_refined_doc,refined_doc=refine_doc(docs,question)
return refined_doc[0].page_content + '\n\n' + refined_doc[1].page_content
elif 0.3<score <0.7 :
score_refined_doc,refined_doc=refine_doc(docs,question)
external_knowledge=External_Knowledge(question)
return external_knowledge + '\n\n' + refined_doc[score_refined_doc[-1]].page_content
else:
external_knowledge=External_Knowledge(question)
return external_knowledge
#LLM Blender
def PairRanker(instruction,candidate1,candidate2):
prompt = f'''
Instruction:
${instruction}
Input:
${input}
Candidate A:
${candidate1}
Candidate B:
${candidate2}
Given the instruction and input above, please compare the two candidates.
You only have 4 choices to output:
If you think A is better, please output: 1. A is better
If you think B is better, please output: 2. B is better
If you think both are good enough correctly give the answer, please output: 3. Same good
If you think both are bad and do not follow the instruction, please output: 4. Same bad
Do not output anything else except the 4 choices above.
Output your choice below:
'''
model_pair_ranker = genai.GenerativeModel('gemini-pro')
response = model_pair_ranker.generate_content(prompt,safety_settings=safety_settings)
return response.text
def GenFuser(instruction,ranking,outputs,top_models=3):
prompt =f'''
Instruction:{instruction} \n
The outputs are in order of their quality and correctness. \n
'''
for i in range(top_models):
prompt += f'''\n Output {i+1}: {outputs[ranking[i]]} \n'''
prompt += f'''\n Fuse the outputs above to generate a better output. \n'''
model_gen_fuser = genai.GenerativeModel('gemini-pro')
response = model_gen_fuser.generate_content(prompt,safety_settings=safety_settings)
return response.text
def ranking_matrix(instruction,candidates,top_models=3):
outputs = []
for i in range(top_models):
dummy = []
for j in range(top_models):
z=PairRanker(instruction,candidates[i],candidates[j])
if z == "1. A is better":
dummy.append(1)
elif z == "2. B is better":
dummy.append(-1)
elif z == "3. Same good" or z == "4. Same bad":
dummy.append(0)
outputs.append(dummy)
return outputs
def rank_model(instruction,candidates,top_models=3):
matrix = ranking_matrix(instruction,candidates,top_models)
s=[]
for i in range(top_models):
k=0
for j in range(top_models):
k+=(matrix[i][j]-matrix[j][i])
s.append(k)
sort_index = [i for i, x in sorted(enumerate(s), key=lambda x: x[1])]
return sort_index[::-1]
#Self Discovery
REASONING_MODULES = [
"1. How could I devise an experiment to help solve that problem?",
"2. Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made.",
"3. How could I measure progress on this problem?",
"4. How can I simplify the problem so that it is easier to solve?",
"5. What are the key assumptions underlying this problem?",
"6. What are the potential risks and drawbacks of each solution?",
"7. What are the alternative perspectives or viewpoints on this problem?",
"8. What are the long-term implications of this problem and its solutions?",
"9. How can I break down this problem into smaller, more manageable parts?",
"10. Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking.",
"11. Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality.",
"12. Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions.",
"13. Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole.",
"14. Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits.",
"15. Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches.",
"16. What is the core issue or problem that needs to be addressed?",
"17. What are the underlying causes or factors contributing to the problem?",
"18. Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned?",
"19. What are the potential obstacles or challenges that might arise in solving this problem?",
"20. Are there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed?",
"21. Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?",
"22. What resources (financial, human, technological, etc.) are needed to tackle the problem effectively?",
"23. How can progress or success in solving the problem be measured or evaluated?",
"24. What indicators or metrics can be used?",
"25. Is the problem a technical or practical one that requires a specific expertise or skill set? Or is it more of a conceptual or theoretical problem?",
"26. Does the problem involve a physical constraint, such as limited resources, infrastructure, or space?",
"27. Is the problem related to human behavior, such as a social, cultural, or psychological issue?",
"28. Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives?",
"29. Is the problem an analytical one that requires data analysis, modeling, or optimization techniques?",
"30. Is the problem a design challenge that requires creative solutions and innovation?",
"31. Does the problem require addressing systemic or structural issues rather than just individual instances?",
"32. Is the problem time-sensitive or urgent, requiring immediate attention and action?",
"33. What kinds of solution typically are produced for this kind of problem specification?",
"34. Given the problem specification and the current best solution, have a guess about other possible solutions."
"35. Let’s imagine the current best solution is totally wrong, what other ways are there to think about the problem specification?"
"36. What is the best way to modify this current best solution, given what you know about these kinds of problem specification?"
"37. Ignoring the current best solution, create an entirely new solution to the problem."
"38. Let’s think step by step ."
"39. Let’s make a step by step plan and implement it with good notation and explanation.",
]
REASONING_MODULES = "\n".join(REASONING_MODULES)
def select_prompt(task):
return f"Given the task: {task}, which of the following reasoning modules are relevant? Do not elaborate on why.\n\n {REASONING_MODULES}"
def adapt_prompt(task, selected_modules):
return f"Without working out the full solution, adapt the following reasoning modules to be specific to our task:\n{selected_modules}\n\nOur task:\n{task}"
def implement_prompt(task,adapted_modules):
return f"Without working out the full solution, create an actionable reasoning structure for the task using these adapted reasoning modules:\n{adapted_modules}\n\nTask Description:\n{task}"
def reasoning_prompt(task,reasoning_structure):
return f"Using the following reasoning structure: {reasoning_structure}\n\nSolve this task, providing your final answer: {task}"
def SELECT(task):
model_selector=genai.GenerativeModel('gemini-pro')
prompt_for_model=select_prompt(task)
response = model_selector.generate_content(prompt_for_model,safety_settings=safety_settings)
response=response.text
return response
def ADAPT(task,selected_modules):
prompt_for_model=adapt_prompt(task,selected_modules)
model_adapter=genai.GenerativeModel('gemini-pro')
response = model_adapter.generate_content(prompt_for_model,safety_settings=safety_settings)
response=response.text
return response
def IMPLEMENT(task,adapted_modules):
prompt_for_model=implement_prompt(task,adapted_modules)
model_implementor=genai.GenerativeModel('gemini-pro')
response = model_implementor.generate_content(prompt_for_model,safety_settings=safety_settings)
response=response.text
return response
def final_ans(model_final_ans,task,reasoning_structure):
prompt_for_model=reasoning_prompt(task,reasoning_structure)
response = model_final_ans.generate_content(prompt_for_model,safety_settings=safety_settings)
response=response.text
return response
def SelfDiscovery(model_final_ans,task):
selected_modules=SELECT(task)
adapted_modules=ADAPT(task,selected_modules)
reasoning_structure=IMPLEMENT(task,adapted_modules)
final_answer=final_ans(model_final_ans,task,reasoning_structure)
return final_answer
def Gemini_3x_blend(question):
RAG=CRAG(question)
generation_config_1 = genai.GenerationConfig(temperature=0.9)#Highly creative
model_1 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_1)
generation_config_2 = genai.GenerationConfig(temperature=0.5)#Midly creative
model_2 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_2)
generation_config_3 = genai.GenerationConfig(temperature=0.1)#Not at all creative
model_3 = genai.GenerativeModel('gemini-pro',generation_config=generation_config_3)
prompt=f'''Context:{RAG} \n
Question:{question} \n
Based on the context answer the question. \n
'''
outputs_1 = SelfDiscovery(model_1,question)
outputs_2 = SelfDiscovery(model_2,question)
outputs_3 = SelfDiscovery(model_3,question)
rank=rank_model(question,[outputs_1,outputs_2,outputs_3])
fused_output=GenFuser(question,rank,[outputs_1,outputs_2,outputs_3])
return fused_output
user_quest = st.text_input("Ask a technical question:")
btn = st.button("Ask")
if btn and user_quest:
result = Gemini_3x_blend(user_quest)
st.subheader("Response : ")
st.text(result) |