Spaces:
Sleeping
Sleeping
File size: 6,120 Bytes
4f30460 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import streamlit as st
import pandas as pd
import time
from typing import List, Dict
from serpapi import GoogleSearch
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
import gspread
from google.oauth2.service_account import Credentials
import pandas as pd
import os
credentials = {
"type": os.getenv("type"),
"project_id": os.getenv("project_id"),
"private_key_id": os.getenv("private_key_id"),
"private_key": os.getenv("private_key"),
"client_email": os.getenv("client_email"),
"client_id": os.getenv("client_id"),
"client_x509_cert_url": os.getenv("client_x509_cert_url"),
"universe_domain": os.getenv("universe_domain"),
"auth_uri": os.getenv("auth_uri"),
"token_uri": os.getenv("token_uri"),
"auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url")
}
def get_sheet_client():
"""Helper function to create authenticated Google Sheets client"""
try:
scope = ["https://www.googleapis.com/auth/spreadsheets"]
creds = Credentials.from_service_account_info(credentials)
client = gspread.authorize(creds)
# Get service account email for error messages
service_account_email = creds.service_account_email
st.session_state['service_account_email'] = service_account_email
return client
except FileNotFoundError:
raise ValueError(
"credentials.json file not found. Please ensure it exists in the project directory."
)
except Exception as e:
raise ValueError(f"Error setting up Google Sheets client: {str(e)}")
def get_worksheet(sheet_id: str, range_name: str = None):
"""Helper function to get worksheet with improved error handling"""
try:
client = get_sheet_client()
sheet = client.open_by_key(sheet_id)
return sheet.worksheet(range_name) if range_name else sheet
except gspread.exceptions.SpreadsheetNotFound:
service_email = st.session_state.get('service_account_email', 'the service account')
raise ValueError(
f"Spreadsheet not found. Please verify:\n"
f"1. The spreadsheet ID is correct\n"
f"2. The sheet is shared with {service_email}\n"
f"3. Sharing permissions allow edit access"
)
except gspread.exceptions.WorksheetNotFound:
raise ValueError(f"Worksheet '{range_name}' not found in the spreadsheet")
except gspread.exceptions.APIError as e:
if 'PERMISSION_DENIED' in str(e):
service_email = st.session_state.get('service_account_email', 'the service account')
raise ValueError(
f"Permission denied. Please share the spreadsheet with {service_email} "
f"and ensure it has edit access."
)
raise ValueError(f"Google Sheets API error: {str(e)}")
def process_queries(df: pd.DataFrame, primary_column: str, query_template: str) -> List[Dict]:
results = []
serpapi_key = os.getenv("SERPAPI_API_KEY")
for index, row in df.iterrows():
try:
value = row[primary_column]
query = query_template.replace(f"{{{primary_column}}}", str(value))
# Perform search
search = GoogleSearch({
"q": query,
"gl": "in",
"api_key": serpapi_key,
"num": 5
})
search_results = search.get_dict()
# Store results
results.append({
primary_column: value,
"query": query,
"search_results": search_results.get("organic_results", [])
})
# Rate limiting
time.sleep(1)
if index % 10 == 0:
st.write(f"Processed {index + 1} queries...")
except Exception as e:
st.warning(f"Error processing query for {value}: {str(e)}")
continue
return results
def setup_llm():
"""Setup LangChain with Groq"""
api_key=os.getenv("GROQ_API_KEY")
llm = ChatGroq(
api_key=api_key,
model="llama-3.1-8b-instant",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
)
return llm
def process_with_ai(search_results: dict, query: str, llm) -> str:
template = """
Extract ONLY the specific information requested from the search results for: {query}
Search Results:
{search_results}
Provide ONLY the extracted information as a simple text response.
If multiple items exist, separate them with semicolons.
If no relevant information is found, respond with "Not found".
For example:
- If asked for locations: "Bengaluru; Mumbai; Delhi"
- If asked for email: "[email protected]"
- If asked for address: "123 Main Street, City, Country"
"""
prompt = PromptTemplate(
input_variables=["query", "search_results"],
template=template
)
chain = prompt | llm
response = chain.invoke({"query": query, "search_results": search_results})
return response
def load_google_sheet(sheet_id: str, range_name: str) -> pd.DataFrame:
worksheet = get_worksheet(sheet_id,range_name)
data = worksheet.get_all_records()
return pd.DataFrame(data)
def write_to_google_sheet(sheet_id: str, range_name: str, results_df: pd.DataFrame):
worksheet = get_worksheet(sheet_id, range_name)
all_values = worksheet.get_all_values()
num_rows = len(all_values)
next_col_num = len(all_values[0]) + 1
next_col_letter = chr(64 + next_col_num)
range = f'{next_col_letter}1:{next_col_letter}{num_rows}'
values = [['AI Results']] + [[str(result)] for result in results_df['result']]
worksheet.update(values, f'{range}')
def get_all_sheet_names(sheet_id: str) -> List[str]:
worksheet = get_worksheet(sheet_id)
sheets = map(lambda x: x.title, worksheet.worksheets())
return list(sheets)
|