from fastapi import FastAPI, HTTPException from pydantic import BaseModel, Field from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.prompts import ChatPromptTemplate import json from firecrawl import FirecrawlApp import gspread import os from dotenv import load_dotenv import json load_dotenv() GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") SHEET_ID = os.getenv("SHEET") cred_dic = os.getenv("cred_dict") cred_dict = json.loads(cred_dic) # Setup Google Sheets connection (update the path and sheet name) # genai ={ # "type": str(os.getenv("type")), # "project_id": str(os.getenv("project_id")), # "private_key_id": str(os.getenv("private_key_id")), # "private_key": str(os.getenv("private_key")), # "client_email": str(os.getenv("client_email")), # "client_id": str(os.getenv("client_id")), # "auth_uri": str(os.getenv("auth_uri")), # "token_uri": str(os.getenv("token_uri")), # "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")), # "client_x509_cert_url": str(os.getenv("client_x509_cert_url")), # "universe_domain": str(os.getenv("universe_domain")) # } gc = gspread.service_account_from_dict(cred_dict) sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name worksheet = sh.worksheet("S1") # Replace with your worksheet name if different # Define your URL scraping function def url_scrape(url): app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY) response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']}) try: return response except Exception: return response # Define the structured output model for job description extraction class JDE(BaseModel): Role: str = Field(description="Title of the job") Company: str = Field(description="Name of the company") Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.") Industry: str = Field(description="Type of Industry the job belongs to") Type: str = Field(description="Working style (Remote, Hybrid, Onsite)") Location: str = Field(description="Location of the company") # The core function that processes the job input and appends data to Google Sheets def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website): # Scrape the job description from the provided link jd = url_scrape(links) # Create the prompt for the language model system = ( "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it." ) prompt_text = """ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. Try to return your answer in JSON format based on the following structure: {{ "Role": "Title of the job", "Company": "Name of the company the job is about", "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", "Type": "Working style (Remote, Hybrid, Onsite)", "Location": "Location of the company" }} Job Description: {jd} """ query_prompt = ChatPromptTemplate.from_messages([ ("system", system), ("human", """ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. Try to return your answer in JSON format based on the following structure: {{ "Role": "Title of the job", "Company": "Name of the company the job is about", "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", "Type": "Working style (Remote, Hybrid, Onsite)", "Location": "Location of the company" }} Job Description: {job_description} """) ]) # Initialize the language model and set it up for structured output using the JDE model llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81) str_llm = llm.with_structured_output(JDE) JDE_re = query_prompt | str_llm # Invoke the language model to extract structured job details q = JDE_re.invoke({"job_description": jd}) # Extract additional fields req = q.Requirements indus = q.Industry # Prepare the row with all the data (append the two extra fields at the end) row = [ links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website, req, indus ] worksheet.append_row(row) return q # Define a Pydantic model for the API input class JobInput(BaseModel): links: str company: str role: str one_liner: str reward: str locations: str tech_stack: str workplace: str salary: str equity: str yoe: str team_size: str funding: str website: str # Create the FastAPI app instance app = FastAPI() @app.post("/create-job") def create_job(job: JobInput): try: result = fastapi_func( links=job.links, company=job.company, role=job.role, one_liner=job.one_liner, reward=job.reward, locations=job.locations, tech_stack=job.tech_stack, workplace=job.workplace, salary=job.salary, equity=job.equity, yoe=job.yoe, team_size=job.team_size, funding=job.funding, website=job.website ) return result except Exception as e: raise HTTPException(status_code=500, detail=str(e))