OmkarG commited on
Commit
5518038
·
1 Parent(s): 680264e

added files

Browse files
Files changed (6) hide show
  1. LLM_test.py +69 -0
  2. RAG_test.py +96 -0
  3. app.py +61 -4
  4. attribute_data.json +156 -0
  5. faiss_index/index.faiss +0 -0
  6. faiss_index/index.pkl +0 -0
LLM_test.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import groq, os
2
+ from groq import Groq
3
+ from RAG_test import fetch
4
+
5
+ client = Groq(
6
+ api_key="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", # This is the default and can be omitted
7
+ )
8
+
9
+ # related_vectors = '''
10
+ # attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
11
+ # attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
12
+ # attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
13
+ # '''
14
+
15
+ query = "Show campaigns where spend is greater than 11 and labels include holiday"
16
+ top_documents = fetch(query)
17
+
18
+
19
+
20
+
21
+ USER_INPUT = "Show campaigns where spend is greater than 11 and labels include holiday and with impressions less than 500"
22
+
23
+ def generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT, related_vectors):
24
+
25
+
26
+ SYSTEM_PROMPT = f'''
27
+ You are a system that converts natural language queries into a structured filter schema.
28
+ The filter schema consists of a list of conditions, each represented as:
29
+ {{
30
+ "attribute": "<attribute_name>",
31
+ "op": "<operator>",
32
+ "value": "<value>"
33
+ }}
34
+ There can be any number of conditions. You have to list them all.
35
+
36
+ Supported attributes and their operators are:
37
+ {related_vectors}
38
+
39
+ Example:
40
+ Input: "Show campaigns where spend is greater than 11"
41
+ Output: [{{"attribute": "spend", "op": ">", "value": 11}}]
42
+
43
+ Input: "Find ads with clicks less than 100 and impressions greater than 500"
44
+ Output: [
45
+ {{"attribute": "clicks", "op": "<", "value": 100}},
46
+ {{"attribute": "impressions", "op": ">", "value": 500}}
47
+ ]
48
+
49
+ STRICLY PROVIDE IN THE ABOVE JSON FORMAT WITHOUT ANY METADATA
50
+
51
+ '''
52
+
53
+ chat_completion = client.chat.completions.create(
54
+ messages=[
55
+ {
56
+ "role": "system",
57
+ "content": SYSTEM_PROMPT,
58
+ },
59
+ {
60
+ "role": "user",
61
+ "content": USER_INPUT,
62
+ },
63
+ ],
64
+ model="llama3-8b-8192",
65
+ )
66
+ return chat_completion.choices[0].message.content
67
+
68
+
69
+ # print(generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT))
RAG_test.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from langchain.vectorstores import FAISS
2
+ # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
3
+ # from langchain.schema import Document
4
+ # import json
5
+ # from pathlib import Path
6
+ # from pprint import pprint
7
+
8
+ # with open('Data.json', 'r') as file:
9
+ # json_data = json.load(file)
10
+
11
+ # text_data = []
12
+ # attribute_data = [] # Store extra data for operators
13
+
14
+ # for message in json_data["messages"]:
15
+ # attribute = message["attribute"]
16
+ # operators = message["supported_operators"] # Keep as a list
17
+ # value_type = "Number" if message["valueType"] == "Numeric" else message["valueType"]
18
+ # sentence = f'''attribute: {attribute}, value_type: {value_type}'''
19
+ # text_data.append(sentence)
20
+
21
+ # # Store attribute-to-operator mapping
22
+ # attribute_data.append({"attribute": attribute, "operators": operators})
23
+
24
+ # # Create documents for FAISS
25
+ # data = [Document(page_content=text) for text in text_data]
26
+
27
+ # pprint(data)
28
+
29
+
30
+ # db = FAISS.from_documents(data,
31
+ # HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'))
32
+
33
+ # # Connect query to FAISS index using a retriever
34
+ # retriever = db.as_retriever(
35
+ # search_type="similarity",
36
+ # search_kwargs={"k": 5}
37
+ # )
38
+
39
+ # # Modify fetch function to include operators
40
+ # def fetch(query):
41
+ # res = retriever.get_relevant_documents(query)
42
+ # docs = []
43
+ # for i in res:
44
+ # # Extract attribute from the document content
45
+ # attribute_line = i.page_content.split(",")[0] # "attribute: X"
46
+ # attribute = attribute_line.split(": ")[1] # Extract "X"
47
+
48
+ # # Find the matching operators from attribute_data
49
+ # operators = next((item["operators"] for item in attribute_data if item["attribute"] == attribute), [])
50
+
51
+ # # Format the operators as a list
52
+ # operators_list = f"operators: {operators}"
53
+
54
+ # # Append the content with operators
55
+ # docs.append(f"{i.page_content}, {operators_list}")
56
+ # return docs
57
+
58
+ # query = "Show campaigns where spend is greater than 11 and labels include holiday"
59
+ # top_documents = fetch(query)
60
+ # pprint(top_documents)
61
+
62
+
63
+
64
+ import json
65
+
66
+ from langchain.vectorstores import FAISS
67
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
68
+
69
+ # Load the FAISS vector store from the directory
70
+ db = FAISS.load_local(
71
+ "faiss_index",
72
+ HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-MiniLM-L6-v2'),
73
+ allow_dangerous_deserialization=True
74
+ )
75
+
76
+ attribute_data = json.load(open("attribute_data.json"))
77
+
78
+ # Connect query to FAISS index using a retriever
79
+ retriever = db.as_retriever(
80
+ search_type="similarity",
81
+ search_kwargs={"k": 5}
82
+ )
83
+
84
+ # Modify fetch function to include operators
85
+ def fetch(query):
86
+ res = retriever.get_relevant_documents(query)
87
+ docs = []
88
+ for i in res:
89
+ attribute_line = i.page_content.split(",")[0] # "attribute: X"
90
+ attribute = attribute_line.split(": ")[1] # Extract "X"
91
+
92
+ # Find the matching operators from attribute_data
93
+ operators = next((item["operators"] for item in attribute_data if item["attribute"] == attribute), [])
94
+ operators_list = f"operators: {operators}"
95
+ docs.append(f"{i.page_content}, {operators_list}")
96
+ return docs
app.py CHANGED
@@ -1,7 +1,64 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from LLM_test import generate_chat_completion
3
+ from RAG_test import fetch
4
+ from groq import Groq
5
 
6
+ client = Groq(
7
+ api_key="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", # This is the default and can be omitted
8
+ )
9
 
10
+ related_vectors = '''
11
+ attribute: spend, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Number"
12
+ attribute: clicks, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
13
+ attribute: impressions, operators_supported: [">", "<", ">=", "<=", "=", "!="], value_type: "Integer"
14
+ '''
15
+
16
+
17
+ SYSTEM_PROMPT = f'''
18
+ You are a system that converts natural language queries into a structured filter schema.
19
+ The filter schema consists of a list of conditions, each represented as:
20
+ {{
21
+ "attribute": "<attribute_name>",
22
+ "op": "<operator>",
23
+ "value": "<value>"
24
+ }}
25
+ There can be any number of conditions. You have to list them all.
26
+
27
+ Supported attributes and their operators are:
28
+ {related_vectors}
29
+
30
+ Example:
31
+ Input: "Show campaigns where spend is greater than 11"
32
+ Output: [{{"attribute": "spend", "op": ">", "value": 11}}]
33
+
34
+ Input: "Find ads with clicks less than 100 and impressions greater than 500"
35
+ Output: [
36
+ {{"attribute": "clicks", "op": "<", "value": 100}},
37
+ {{"attribute": "impressions", "op": ">", "value": 500}}
38
+ ]
39
+
40
+ STRICLY PROVIDE IN THE ABOVE JSON FORMAT WITHOUT ANY METADATA
41
+
42
+ '''
43
+
44
+ # Define the Gradio interface
45
+ def generate_chat_completion_interface(USER_INPUT):
46
+
47
+ top_documents = fetch(USER_INPUT)
48
+ related_vectors = "\n".join(top_documents)
49
+
50
+ result = generate_chat_completion(client, SYSTEM_PROMPT, USER_INPUT, related_vectors)
51
+
52
+ return result
53
+
54
+ # Set up the Gradio app interface
55
+ iface = gr.Interface(
56
+ fn=generate_chat_completion_interface, # Function to run on input
57
+ inputs=gr.Textbox(label="Enter your sentence"), # Input field
58
+ outputs=gr.Textbox(label="Generated Completion"), # Output field
59
+ title="Chat Completion Generator", # Title of the app
60
+ description="This app generates chat completions based on a user-provided input sentence."
61
+ )
62
+
63
+ # Launch the interface
64
+ iface.launch()
attribute_data.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "attribute": "spend",
4
+ "operators": [
5
+ ">",
6
+ "<",
7
+ ">=",
8
+ "<=",
9
+ "=",
10
+ "!="
11
+ ]
12
+ },
13
+ {
14
+ "attribute": "clicks",
15
+ "operators": [
16
+ ">",
17
+ "<",
18
+ ">=",
19
+ "<=",
20
+ "=",
21
+ "!="
22
+ ]
23
+ },
24
+ {
25
+ "attribute": "impressions",
26
+ "operators": [
27
+ ">",
28
+ "<",
29
+ ">=",
30
+ "<=",
31
+ "=",
32
+ "!="
33
+ ]
34
+ },
35
+ {
36
+ "attribute": "conversion_rate",
37
+ "operators": [
38
+ ">",
39
+ "<",
40
+ ">=",
41
+ "<=",
42
+ "=",
43
+ "!="
44
+ ]
45
+ },
46
+ {
47
+ "attribute": "cost_per_click",
48
+ "operators": [
49
+ ">",
50
+ "<",
51
+ ">=",
52
+ "<=",
53
+ "=",
54
+ "!="
55
+ ]
56
+ },
57
+ {
58
+ "attribute": "status",
59
+ "operators": [
60
+ "=",
61
+ "!="
62
+ ]
63
+ },
64
+ {
65
+ "attribute": "labels",
66
+ "operators": [
67
+ "IN",
68
+ "NOT IN"
69
+ ]
70
+ },
71
+ {
72
+ "attribute": "campaign_name",
73
+ "operators": [
74
+ "CONTAINS",
75
+ "NOT CONTAINS",
76
+ "STARTS WITH",
77
+ "ENDS WITH"
78
+ ]
79
+ },
80
+ {
81
+ "attribute": "creation_date",
82
+ "operators": [
83
+ ">",
84
+ "<",
85
+ ">=",
86
+ "<=",
87
+ "=",
88
+ "!="
89
+ ]
90
+ },
91
+ {
92
+ "attribute": "modification_date",
93
+ "operators": [
94
+ ">",
95
+ "<",
96
+ ">=",
97
+ "<=",
98
+ "=",
99
+ "!="
100
+ ]
101
+ },
102
+ {
103
+ "attribute": "ad_type",
104
+ "operators": [
105
+ "=",
106
+ "!="
107
+ ]
108
+ },
109
+ {
110
+ "attribute": "budget",
111
+ "operators": [
112
+ ">",
113
+ "<",
114
+ ">=",
115
+ "<=",
116
+ "=",
117
+ "!="
118
+ ]
119
+ },
120
+ {
121
+ "attribute": "sales",
122
+ "operators": [
123
+ ">",
124
+ "<",
125
+ ">=",
126
+ "<=",
127
+ "=",
128
+ "!="
129
+ ]
130
+ },
131
+ {
132
+ "attribute": "profit_margin",
133
+ "operators": [
134
+ ">",
135
+ "<",
136
+ ">=",
137
+ "<=",
138
+ "=",
139
+ "!="
140
+ ]
141
+ },
142
+ {
143
+ "attribute": "geo",
144
+ "operators": [
145
+ "IN",
146
+ "NOT IN"
147
+ ]
148
+ },
149
+ {
150
+ "attribute": "keyword_match",
151
+ "operators": [
152
+ "CONTAINS",
153
+ "NOT CONTAINS"
154
+ ]
155
+ }
156
+ ]
faiss_index/index.faiss ADDED
Binary file (24.6 kB). View file
 
faiss_index/index.pkl ADDED
Binary file (2.42 kB). View file