Spaces:

OmkarG
/

RAG_based_search

Sleeping

App Files Files Community

OmkarG commited on Dec 26, 2024

Commit

5518038

1 Parent(s): 680264e

added files

Browse files

Files changed (6) hide show

LLM_test.py +69 -0
RAG_test.py +96 -0
app.py +61 -4
attribute_data.json +156 -0
faiss_index/index.faiss +0 -0
faiss_index/index.pkl +0 -0

LLM_test.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import groq, os
+from groq import Groq
+from RAG_test import fetch
+client = Groq(
+    api_key="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",  # This is the default and can be omitted
+)
+# related_vectors = '''
+# attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
+# attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
+# attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
+# '''
+query = "Show campaigns where spend is greater than 11 and labels include holiday"
+top_documents = fetch(query)
+USER_INPUT = "Show campaigns where spend is greater than 11 and labels include holiday and with impressions less than 500"
+def generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT, related_vectors):
+    SYSTEM_PROMPT = f'''
+    You are a system that converts natural language queries into a structured filter schema.
+    The filter schema consists of a list of conditions, each represented as:
+    {{
+        "attribute": "<attribute_name>",
+        "op": "<operator>",
+        "value": "<value>"
+    }}
+    There can be any number of conditions. You have to list them all.
+    Supported attributes and their operators are:
+    {related_vectors}
+    Example:
+    Input: "Show campaigns where spend is greater than 11"
+    Output: [{{"attribute": "spend", "op": ">", "value": 11}}]
+    Input: "Find ads with clicks less than 100 and impressions greater than 500"
+    Output: [
+        {{"attribute": "clicks", "op": "<", "value": 100}},
+        {{"attribute": "impressions", "op": ">", "value": 500}}
+    ]
+    STRICLY PROVIDE IN THE ABOVE JSON FORMAT WITHOUT ANY METADATA
+    '''
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "system",
+                "content": SYSTEM_PROMPT,
+            },
+            {
+                "role": "user",
+                "content": USER_INPUT,
+            },
+        ],
+        model="llama3-8b-8192",
+    )
+    return chat_completion.choices[0].message.content
+# print(generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT))

RAG_test.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# from langchain.vectorstores import FAISS
+# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+# from langchain.schema import Document
+# import json
+# from pathlib import Path
+# from pprint import pprint
+# with open('Data.json', 'r') as file:
+#     json_data = json.load(file)
+# text_data = []
+# attribute_data = []  # Store extra data for operators
+# for message in json_data["messages"]:
+#     attribute = message["attribute"]
+#     operators = message["supported_operators"]  # Keep as a list
+#     value_type = "Number" if message["valueType"] == "Numeric" else message["valueType"]
+#     sentence = f'''attribute: {attribute}, value_type: {value_type}'''
+#     text_data.append(sentence)
+#     # Store attribute-to-operator mapping
+#     attribute_data.append({"attribute": attribute, "operators": operators})
+# # Create documents for FAISS
+# data = [Document(page_content=text) for text in text_data]
+# pprint(data)
+# db = FAISS.from_documents(data,
+#                           HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'))
+# # Connect query to FAISS index using a retriever
+# retriever = db.as_retriever(
+#     search_type="similarity",
+#     search_kwargs={"k": 5}
+# )
+# # Modify fetch function to include operators
+# def fetch(query):
+#     res = retriever.get_relevant_documents(query)
+#     docs = []
+#     for i in res:
+#         # Extract attribute from the document content
+#         attribute_line = i.page_content.split(",")[0]  # "attribute: X"
+#         attribute = attribute_line.split(": ")[1]  # Extract "X"
+#         # Find the matching operators from attribute_data
+#         operators = next((item["operators"] for item in attribute_data if item["attribute"] == attribute), [])
+#         # Format the operators as a list
+#         operators_list = f"operators: {operators}"
+#         # Append the content with operators
+#         docs.append(f"{i.page_content}, {operators_list}")
+#     return docs
+# query = "Show campaigns where spend is greater than 11 and labels include holiday"
+# top_documents = fetch(query)
+# pprint(top_documents)
+import json
+from langchain.vectorstores import FAISS
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+# Load the FAISS vector store from the directory
+db = FAISS.load_local(
+    "faiss_index",
+    HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'),
+    allow_dangerous_deserialization=True
+)
+attribute_data = json.load(open("attribute_data.json"))
+# Connect query to FAISS index using a retriever
+retriever = db.as_retriever(
+    search_type="similarity",
+    search_kwargs={"k": 5}
+)
+# Modify fetch function to include operators
+def fetch(query):
+    res = retriever.get_relevant_documents(query)
+    docs = []
+    for i in res:
+        attribute_line = i.page_content.split(",")[0]  # "attribute: X"
+        attribute = attribute_line.split(": ")[1]  # Extract "X"
+        # Find the matching operators from attribute_data
+        operators = next((item["operators"] for item in attribute_data if item["attribute"] == attribute), [])
+        operators_list = f"operators: {operators}"
+        docs.append(f"{i.page_content}, {operators_list}")
+    return docs

app.py CHANGED Viewed

@@ -1,7 +1,64 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from LLM_test import generate_chat_completion
+from RAG_test import fetch
+from groq import Groq
+client = Groq(
+    api_key="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",  # This is the default and can be omitted
+)
+related_vectors = '''
+attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
+attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
+attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
+'''
+SYSTEM_PROMPT = f'''
+You are a system that converts natural language queries into a structured filter schema.
+    The filter schema consists of a list of conditions, each represented as:
+    {{
+        "attribute": "<attribute_name>",
+        "op": "<operator>",
+        "value": "<value>"
+    }}
+    There can be any number of conditions. You have to list them all.
+    Supported attributes and their operators are:
+    {related_vectors}
+    Example:
+    Input: "Show campaigns where spend is greater than 11"
+    Output: [{{"attribute": "spend", "op": ">", "value": 11}}]
+    Input: "Find ads with clicks less than 100 and impressions greater than 500"
+    Output: [
+        {{"attribute": "clicks", "op": "<", "value": 100}},
+        {{"attribute": "impressions", "op": ">", "value": 500}}
+    ]
+    STRICLY PROVIDE IN THE ABOVE JSON FORMAT WITHOUT ANY METADATA
+'''
+# Define the Gradio interface
+def generate_chat_completion_interface(USER_INPUT):
+    top_documents = fetch(USER_INPUT)
+    related_vectors = "\n".join(top_documents)
+    result = generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT, related_vectors)
+    return result
+# Set up the Gradio app interface
+iface = gr.Interface(
+    fn=generate_chat_completion_interface,  # Function to run on input
+    inputs=gr.Textbox(label="Enter your sentence"),  # Input field
+    outputs=gr.Textbox(label="Generated Completion"),  # Output field
+    title="Chat Completion Generator",  # Title of the app
+    description="This app generates chat completions based on a user-provided input sentence."
+)
+# Launch the interface
+iface.launch()

attribute_data.json ADDED Viewed

	@@ -0,0 +1,156 @@

+[
+    {
+        "attribute": "spend",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "clicks",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "impressions",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "conversion_rate",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "cost_per_click",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "status",
+        "operators": [
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "labels",
+        "operators": [
+            "IN",
+            "NOT IN"
+        ]
+    },
+    {
+        "attribute": "campaign_name",
+        "operators": [
+            "CONTAINS",
+            "NOT CONTAINS",
+            "STARTS WITH",
+            "ENDS WITH"
+        ]
+    },
+    {
+        "attribute": "creation_date",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "modification_date",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "ad_type",
+        "operators": [
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "budget",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "sales",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "profit_margin",
+        "operators": [
+            ">",
+            "<",
+            ">=",
+            "<=",
+            "=",
+            "!="
+        ]
+    },
+    {
+        "attribute": "geo",
+        "operators": [
+            "IN",
+            "NOT IN"
+        ]
+    },
+    {
+        "attribute": "keyword_match",
+        "operators": [
+            "CONTAINS",
+            "NOT CONTAINS"
+        ]
+    }
+]

faiss_index/index.faiss ADDED Viewed

Binary file (24.6 kB). View file

faiss_index/index.pkl ADDED Viewed

Binary file (2.42 kB). View file