Spaces:

OmkarG
/

RAG_based_search

Sleeping

App Files Files Community

OmkarG commited on Dec 27, 2024

Commit

7b240d9

1 Parent(s): a51ac58

updated Groq API key

Browse files

Files changed (3) hide show

app.py +7 -46
LLM_test.py → groq_helper.py +6 -19
retrieval_helper.py +3 -66

app.py CHANGED Viewed

@@ -1,63 +1,24 @@
 import gradio as gr
 import json
-from LLM_test import generate_chat_completion
 from retrieval_helper import fetch
 from groq import Groq
-client = Groq(
-    api_key="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",  # This is the default and can be omitted
-)
-# related_vectors = '''
-# attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
-# attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
-# attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
-# '''
-# SYSTEM_PROMPT = f'''
-# You are a system that converts natural language queries into a structured filter schema.
-#     The filter schema consists of a list of conditions, each represented as:
-#     {{
-#         "attribute": "<attribute_name>",
-#         "op": "<operator>",
-#         "value": "<value>"
-#     }}
-#     There can be any number of conditions. You have to list them all.
-#     Supported attributes and their operators are:
-#     {related_vectors}
-#     Example:
-#     Input: "Show campaigns where spend is greater than 11"
-#     Output: [{{"attribute": "spend", "op": ">", "value": 11}}]
-#     Input: "Find ads with clicks less than 100 and impressions greater than 500"
-#     Output: [
-#         {{"attribute": "clicks", "op": "<", "value": 100}},
-#         {{"attribute": "impressions", "op": ">", "value": 500}}
-#     ]
-#     STRICLY PROVIDE IN THE ABOVE JSON FORMAT WITHOUT ANY METADATA
-# '''
-# Define the Gradio interface
 def generate_chat_completion_interface(USER_INPUT):
     top_documents = fetch(USER_INPUT)
     related_vectors = "\n".join(top_documents)
-    result = generate_chat_completion(client, USER_INPUT, related_vectors)
     return result
-# Set up the Gradio app interface
 iface = gr.Interface(
-    fn=generate_chat_completion_interface,  # Function to run on input
-    inputs=gr.Textbox(label="Enter your query"),  # Input field
-    outputs=gr.Textbox(label="Generated JSON"),  # Output field
-    title="RAG based search",  # Title of the app
     description="Provide your natural language searhc query"
 )

 import gradio as gr
 import json
+from groq_helper import generate_chat_completion
 from retrieval_helper import fetch
 from groq import Groq
 def generate_chat_completion_interface(USER_INPUT):
     top_documents = fetch(USER_INPUT)
     related_vectors = "\n".join(top_documents)
+    result = generate_chat_completion(USER_INPUT, related_vectors)
     return result
+# Gradio app interface
 iface = gr.Interface(
+    fn=generate_chat_completion_interface,
+    inputs=gr.Textbox(label="Enter your query"),
+    outputs=gr.Textbox(label="Generated JSON"),
+    title="RAG based search",
     description="Provide your natural language searhc query"
 )

LLM_test.py → groq_helper.py RENAMED Viewed

@@ -1,25 +1,11 @@
-import groq, os
 from groq import Groq
-from retrieval_helper import fetch
 client = Groq(
-    api_key="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",  # This is the default and can be omitted
 )
-# related_vectors = '''
-# attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
-# attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
-# attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
-# '''
-query = "Show campaigns where spend is greater than 11 and labels include holiday"
-top_documents = fetch(query)
-USER_INPUT = "Show campaigns where spend is greater than 11 and labels include holiday and with impressions less than 500"
-def generate_chat_completion(client, USER_INPUT, related_vectors):
     SYSTEM_PROMPT = f'''
     You are a system that converts natural language queries into a structured filter schema.
@@ -63,5 +49,6 @@ def generate_chat_completion(client, USER_INPUT, related_vectors):
     )
     return chat_completion.choices[0].message.content
-# print(generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT))

+import os
 from groq import Groq
 client = Groq(
+    api_key=os.getenv('GROQ_API_KEY'),
 )
+def generate_chat_completion(USER_INPUT, related_vectors):
     SYSTEM_PROMPT = f'''
     You are a system that converts natural language queries into a structured filter schema.
     )
     return chat_completion.choices[0].message.content
+#Test input
+# USER_INPUT = "Show campaigns where spend is greater than 11 and labels include holiday and with impressions less than 500"
+# print(generate_chat_completion(SYSTEM_PROMPT, USER_INPUT))

retrieval_helper.py CHANGED Viewed

@@ -1,79 +1,16 @@
-# from langchain.vectorstores import FAISS
-# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-# from langchain.schema import Document
-# import json
-# from pathlib import Path
-# from pprint import pprint
-# with open('Data.json', 'r') as file:
-#     json_data = json.load(file)
-# text_data = []
-# attribute_data = []  # Store extra data for operators
-# for message in json_data["messages"]:
-#     attribute = message["attribute"]
-#     operators = message["supported_operators"]  # Keep as a list
-#     value_type = "Number" if message["valueType"] == "Numeric" else message["valueType"]
-#     sentence = f'''attribute: {attribute}, value_type: {value_type}'''
-#     text_data.append(sentence)
-#     # Store attribute-to-operator mapping
-#     attribute_data.append({"attribute": attribute, "operators": operators})
-# # Create documents for FAISS
-# data = [Document(page_content=text) for text in text_data]
-# pprint(data)
-# db = FAISS.from_documents(data,
-#                           HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'))
-# # Connect query to FAISS index using a retriever
-# retriever = db.as_retriever(
-#     search_type="similarity",
-#     search_kwargs={"k": 5}
-# )
-# # Modify fetch function to include operators
-# def fetch(query):
-#     res = retriever.get_relevant_documents(query)
-#     docs = []
-#     for i in res:
-#         # Extract attribute from the document content
-#         attribute_line = i.page_content.split(",")[0]  # "attribute: X"
-#         attribute = attribute_line.split(": ")[1]  # Extract "X"
-#         # Find the matching operators from attribute_data
-#         operators = next((item["operators"] for item in attribute_data if item["attribute"] == attribute), [])
-#         # Format the operators as a list
-#         operators_list = f"operators: {operators}"
-#         # Append the content with operators
-#         docs.append(f"{i.page_content}, {operators_list}")
-#     return docs
-# query = "Show campaigns where spend is greater than 11 and labels include holiday"
-# top_documents = fetch(query)
-# pprint(top_documents)
 import json
 from langchain.vectorstores import FAISS
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-# Load the FAISS vector store from the directory
 db = FAISS.load_local(
     "faiss_index",
     HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'),
     allow_dangerous_deserialization=True
 )
-attribute_data = json.load(open("attribute_data.json"))
 # Connect query to FAISS index using a retriever
 retriever = db.as_retriever(

 import json
 from langchain.vectorstores import FAISS
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+# Load the FAISS vector store from the directory 'faiss_index'
 db = FAISS.load_local(
     "faiss_index",
     HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'),
     allow_dangerous_deserialization=True
 )
+#Load attrributes along with their supported operators
+attribute_data = json.load(open("attribute_data.json"))
 # Connect query to FAISS index using a retriever
 retriever = db.as_retriever(