Umang-Bansal commited on
Commit
4f30460
·
verified ·
1 Parent(s): 3104839

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +175 -162
functions.py CHANGED
@@ -1,162 +1,175 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import time
4
- from typing import List, Dict
5
- from serpapi import GoogleSearch
6
- from langchain_groq import ChatGroq
7
- from langchain.prompts import PromptTemplate
8
- import gspread
9
- from google.oauth2.service_account import Credentials
10
- import pandas as pd
11
- import os
12
-
13
- def get_sheet_client():
14
- """Helper function to create authenticated Google Sheets client"""
15
- try:
16
- scope = ["https://www.googleapis.com/auth/spreadsheets"]
17
- creds = Credentials.from_service_account_file("credentials.json", scopes=scope)
18
- client = gspread.authorize(creds)
19
-
20
- # Get service account email for error messages
21
- service_account_email = creds.service_account_email
22
- st.session_state['service_account_email'] = service_account_email
23
-
24
- return client
25
- except FileNotFoundError:
26
- raise ValueError(
27
- "credentials.json file not found. Please ensure it exists in the project directory."
28
- )
29
- except Exception as e:
30
- raise ValueError(f"Error setting up Google Sheets client: {str(e)}")
31
-
32
- def get_worksheet(sheet_id: str, range_name: str = None):
33
- """Helper function to get worksheet with improved error handling"""
34
- try:
35
- client = get_sheet_client()
36
- sheet = client.open_by_key(sheet_id)
37
- return sheet.worksheet(range_name) if range_name else sheet
38
- except gspread.exceptions.SpreadsheetNotFound:
39
- service_email = st.session_state.get('service_account_email', 'the service account')
40
- raise ValueError(
41
- f"Spreadsheet not found. Please verify:\n"
42
- f"1. The spreadsheet ID is correct\n"
43
- f"2. The sheet is shared with {service_email}\n"
44
- f"3. Sharing permissions allow edit access"
45
- )
46
- except gspread.exceptions.WorksheetNotFound:
47
- raise ValueError(f"Worksheet '{range_name}' not found in the spreadsheet")
48
- except gspread.exceptions.APIError as e:
49
- if 'PERMISSION_DENIED' in str(e):
50
- service_email = st.session_state.get('service_account_email', 'the service account')
51
- raise ValueError(
52
- f"Permission denied. Please share the spreadsheet with {service_email} "
53
- f"and ensure it has edit access."
54
- )
55
- raise ValueError(f"Google Sheets API error: {str(e)}")
56
-
57
- def process_queries(df: pd.DataFrame, primary_column: str, query_template: str) -> List[Dict]:
58
- results = []
59
-
60
- serpapi_key = os.getenv("SERPAPI_API_KEY")
61
- for index, row in df.iterrows():
62
- try:
63
- value = row[primary_column]
64
- query = query_template.replace(f"{{{primary_column}}}", str(value))
65
-
66
- # Perform search
67
- search = GoogleSearch({
68
- "q": query,
69
- "gl": "in",
70
- "api_key": serpapi_key,
71
- "num": 5
72
- })
73
- search_results = search.get_dict()
74
-
75
- # Store results
76
- results.append({
77
- primary_column: value,
78
- "query": query,
79
- "search_results": search_results.get("organic_results", [])
80
- })
81
-
82
- # Rate limiting
83
- time.sleep(1)
84
-
85
-
86
- if index % 10 == 0:
87
- st.write(f"Processed {index + 1} queries...")
88
-
89
- except Exception as e:
90
- st.warning(f"Error processing query for {value}: {str(e)}")
91
- continue
92
-
93
- return results
94
-
95
- def setup_llm():
96
- """Setup LangChain with Groq"""
97
- api_key=os.getenv("GROQ_API_KEY")
98
- llm = ChatGroq(
99
- api_key=api_key,
100
- model="llama-3.1-8b-instant",
101
- temperature=0,
102
- max_tokens=None,
103
- timeout=None,
104
- max_retries=2,
105
- )
106
- return llm
107
-
108
- def process_with_ai(search_results: dict, query: str, llm) -> str:
109
- template = """
110
- Extract ONLY the specific information requested from the search results for: {query}
111
-
112
- Search Results:
113
- {search_results}
114
-
115
- Provide ONLY the extracted information as a simple text response.
116
- If multiple items exist, separate them with semicolons.
117
- If no relevant information is found, respond with "Not found".
118
-
119
- For example:
120
- - If asked for locations: "Bengaluru; Mumbai; Delhi"
121
- - If asked for email: "[email protected]"
122
- - If asked for address: "123 Main Street, City, Country"
123
- """
124
-
125
- prompt = PromptTemplate(
126
- input_variables=["query", "search_results"],
127
- template=template
128
- )
129
-
130
- chain = prompt | llm
131
- response = chain.invoke({"query": query, "search_results": search_results})
132
-
133
- return response
134
-
135
-
136
- def load_google_sheet(sheet_id: str, range_name: str) -> pd.DataFrame:
137
- worksheet = get_worksheet(sheet_id,range_name)
138
- data = worksheet.get_all_records()
139
- return pd.DataFrame(data)
140
-
141
-
142
- def write_to_google_sheet(sheet_id: str, range_name: str, results_df: pd.DataFrame):
143
-
144
- worksheet = get_worksheet(sheet_id, range_name)
145
-
146
- all_values = worksheet.get_all_values()
147
- num_rows = len(all_values)
148
- next_col_num = len(all_values[0]) + 1
149
- next_col_letter = chr(64 + next_col_num)
150
-
151
- range = f'{next_col_letter}1:{next_col_letter}{num_rows}'
152
-
153
- values = [['AI Results']] + [[str(result)] for result in results_df['result']]
154
-
155
- worksheet.update(values, f'{range}')
156
-
157
-
158
- def get_all_sheet_names(sheet_id: str) -> List[str]:
159
-
160
- worksheet = get_worksheet(sheet_id)
161
- sheets = map(lambda x: x.title, worksheet.worksheets())
162
- return list(sheets)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ from typing import List, Dict
5
+ from serpapi import GoogleSearch
6
+ from langchain_groq import ChatGroq
7
+ from langchain.prompts import PromptTemplate
8
+ import gspread
9
+ from google.oauth2.service_account import Credentials
10
+ import pandas as pd
11
+ import os
12
+
13
+ credentials = {
14
+ "type": os.getenv("type"),
15
+ "project_id": os.getenv("project_id"),
16
+ "private_key_id": os.getenv("private_key_id"),
17
+ "private_key": os.getenv("private_key"),
18
+ "client_email": os.getenv("client_email"),
19
+ "client_id": os.getenv("client_id"),
20
+ "client_x509_cert_url": os.getenv("client_x509_cert_url"),
21
+ "universe_domain": os.getenv("universe_domain"),
22
+ "auth_uri": os.getenv("auth_uri"),
23
+ "token_uri": os.getenv("token_uri"),
24
+ "auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url")
25
+ }
26
+ def get_sheet_client():
27
+ """Helper function to create authenticated Google Sheets client"""
28
+ try:
29
+ scope = ["https://www.googleapis.com/auth/spreadsheets"]
30
+ creds = Credentials.from_service_account_info(credentials)
31
+ client = gspread.authorize(creds)
32
+
33
+ # Get service account email for error messages
34
+ service_account_email = creds.service_account_email
35
+ st.session_state['service_account_email'] = service_account_email
36
+
37
+ return client
38
+ except FileNotFoundError:
39
+ raise ValueError(
40
+ "credentials.json file not found. Please ensure it exists in the project directory."
41
+ )
42
+ except Exception as e:
43
+ raise ValueError(f"Error setting up Google Sheets client: {str(e)}")
44
+
45
+ def get_worksheet(sheet_id: str, range_name: str = None):
46
+ """Helper function to get worksheet with improved error handling"""
47
+ try:
48
+ client = get_sheet_client()
49
+ sheet = client.open_by_key(sheet_id)
50
+ return sheet.worksheet(range_name) if range_name else sheet
51
+ except gspread.exceptions.SpreadsheetNotFound:
52
+ service_email = st.session_state.get('service_account_email', 'the service account')
53
+ raise ValueError(
54
+ f"Spreadsheet not found. Please verify:\n"
55
+ f"1. The spreadsheet ID is correct\n"
56
+ f"2. The sheet is shared with {service_email}\n"
57
+ f"3. Sharing permissions allow edit access"
58
+ )
59
+ except gspread.exceptions.WorksheetNotFound:
60
+ raise ValueError(f"Worksheet '{range_name}' not found in the spreadsheet")
61
+ except gspread.exceptions.APIError as e:
62
+ if 'PERMISSION_DENIED' in str(e):
63
+ service_email = st.session_state.get('service_account_email', 'the service account')
64
+ raise ValueError(
65
+ f"Permission denied. Please share the spreadsheet with {service_email} "
66
+ f"and ensure it has edit access."
67
+ )
68
+ raise ValueError(f"Google Sheets API error: {str(e)}")
69
+
70
+ def process_queries(df: pd.DataFrame, primary_column: str, query_template: str) -> List[Dict]:
71
+ results = []
72
+
73
+ serpapi_key = os.getenv("SERPAPI_API_KEY")
74
+ for index, row in df.iterrows():
75
+ try:
76
+ value = row[primary_column]
77
+ query = query_template.replace(f"{{{primary_column}}}", str(value))
78
+
79
+ # Perform search
80
+ search = GoogleSearch({
81
+ "q": query,
82
+ "gl": "in",
83
+ "api_key": serpapi_key,
84
+ "num": 5
85
+ })
86
+ search_results = search.get_dict()
87
+
88
+ # Store results
89
+ results.append({
90
+ primary_column: value,
91
+ "query": query,
92
+ "search_results": search_results.get("organic_results", [])
93
+ })
94
+
95
+ # Rate limiting
96
+ time.sleep(1)
97
+
98
+
99
+ if index % 10 == 0:
100
+ st.write(f"Processed {index + 1} queries...")
101
+
102
+ except Exception as e:
103
+ st.warning(f"Error processing query for {value}: {str(e)}")
104
+ continue
105
+
106
+ return results
107
+
108
+ def setup_llm():
109
+ """Setup LangChain with Groq"""
110
+ api_key=os.getenv("GROQ_API_KEY")
111
+ llm = ChatGroq(
112
+ api_key=api_key,
113
+ model="llama-3.1-8b-instant",
114
+ temperature=0,
115
+ max_tokens=None,
116
+ timeout=None,
117
+ max_retries=2,
118
+ )
119
+ return llm
120
+
121
+ def process_with_ai(search_results: dict, query: str, llm) -> str:
122
+ template = """
123
+ Extract ONLY the specific information requested from the search results for: {query}
124
+
125
+ Search Results:
126
+ {search_results}
127
+
128
+ Provide ONLY the extracted information as a simple text response.
129
+ If multiple items exist, separate them with semicolons.
130
+ If no relevant information is found, respond with "Not found".
131
+
132
+ For example:
133
+ - If asked for locations: "Bengaluru; Mumbai; Delhi"
134
+ - If asked for email: "[email protected]"
135
+ - If asked for address: "123 Main Street, City, Country"
136
+ """
137
+
138
+ prompt = PromptTemplate(
139
+ input_variables=["query", "search_results"],
140
+ template=template
141
+ )
142
+
143
+ chain = prompt | llm
144
+ response = chain.invoke({"query": query, "search_results": search_results})
145
+
146
+ return response
147
+
148
+
149
+ def load_google_sheet(sheet_id: str, range_name: str) -> pd.DataFrame:
150
+ worksheet = get_worksheet(sheet_id,range_name)
151
+ data = worksheet.get_all_records()
152
+ return pd.DataFrame(data)
153
+
154
+
155
+ def write_to_google_sheet(sheet_id: str, range_name: str, results_df: pd.DataFrame):
156
+
157
+ worksheet = get_worksheet(sheet_id, range_name)
158
+
159
+ all_values = worksheet.get_all_values()
160
+ num_rows = len(all_values)
161
+ next_col_num = len(all_values[0]) + 1
162
+ next_col_letter = chr(64 + next_col_num)
163
+
164
+ range = f'{next_col_letter}1:{next_col_letter}{num_rows}'
165
+
166
+ values = [['AI Results']] + [[str(result)] for result in results_df['result']]
167
+
168
+ worksheet.update(values, f'{range}')
169
+
170
+
171
+ def get_all_sheet_names(sheet_id: str) -> List[str]:
172
+
173
+ worksheet = get_worksheet(sheet_id)
174
+ sheets = map(lambda x: x.title, worksheet.worksheets())
175
+ return list(sheets)