Spaces:

ak0601
/

Paraform_data

Paused

App Files Files Community

ak0601 commited on Mar 27

Commit

763d20d

verified ·

1 Parent(s): 888d419

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +164 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+import json
+from firecrawl import FirecrawlApp
+import gspread
+import os
+from dotenv import load_dotenv
+import json
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
+SHEET_ID = os.getenv("SHEET_ID")
+dic = os.getenv("genai")
+if dic:
+    try:
+        dic1 = json.loads(dic)
+        print(dic1)
+    except json.JSONDecodeError:
+        print("Error: 'genai' environment variable is not valid JSON")
+else:
+    print("Warning: 'genai' environment variable is not set")
+# Setup Google Sheets connection (update the path and sheet name)
+genai ={
+  "type": os.getenv("type"),
+  "project_id": os.getenv("project_id"),
+  "private_key_id": os.getenv("private_key_id"),
+  "private_key": os.getenv("private_key"),
+  "client_email": os.getenv("client_email"),
+  "client_id": os.getenv("client_id"),
+  "auth_uri": os.getenv("auth_uri"),
+  "token_uri": os.getenv("token_uri"),
+  "auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url"),
+  "client_x509_cert_url": os.getenv("client_x509_cert_url"),
+  "universe_domain": os.getenv("universe_domain")
+}
+gc = gspread.service_account_from_dict(dic1)
+sh = gc.open_by_key(SHEET_ID)  # Replace with your Google Sheet name
+worksheet = sh.worksheet("S1")   # Replace with your worksheet name if different
+# Define your URL scraping function
+def url_scrape(url):
+    app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
+    response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
+    try:
+        return response
+    except Exception:
+        return response
+# Define the structured output model for job description extraction
+class JDE(BaseModel):
+    Role: str = Field(description="Title of the job")
+    Company: str = Field(description="Name of the company")
+    Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
+    Industry: str = Field(description="Type of Industry the job belongs to")
+    Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
+    Location: str = Field(description="Location of the company")
+# The core function that processes the job input and appends data to Google Sheets
+def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
+    # Scrape the job description from the provided link
+    jd = url_scrape(links)
+    # Create the prompt for the language model
+    system = (
+        "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
+    )
+    prompt_text = """
+You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
+Try to return your answer in JSON format based on the following structure:
+{{
+    "Role": "Title of the job",
+    "Company": "Name of the company the job is about",
+    "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
+    "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
+    "Type": "Working style (Remote, Hybrid, Onsite)",
+    "Location": "Location of the company"
+}}
+Job Description: {jd}
+    """
+    query_prompt = ChatPromptTemplate.from_messages([
+        ("system", system),
+        ("human", """
+You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
+Try to return your answer in JSON format based on the following structure:
+{{
+    "Role": "Title of the job",
+    "Company": "Name of the company the job is about",
+    "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
+    "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
+    "Type": "Working style (Remote, Hybrid, Onsite)",
+    "Location": "Location of the company"
+}}
+Job Description: {job_description}
+    """)
+    ])
+    # Initialize the language model and set it up for structured output using the JDE model
+    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
+    str_llm = llm.with_structured_output(JDE)
+    JDE_re = query_prompt | str_llm
+    # Invoke the language model to extract structured job details
+    q = JDE_re.invoke({"job_description": jd})
+    # Extract additional fields
+    req = q.Requirements
+    indus = q.Industry
+    # Prepare the row with all the data (append the two extra fields at the end)
+    row = [
+        links, company, role, one_liner, reward, locations,
+        tech_stack, workplace, salary, equity, yoe, team_size,
+        funding, website, req, indus
+    ]
+    worksheet.append_row(row)
+    return q
+# Define a Pydantic model for the API input
+class JobInput(BaseModel):
+    links: str
+    company: str
+    role: str
+    one_liner: str
+    reward: str
+    locations: str
+    tech_stack: str
+    workplace: str
+    salary: str
+    equity: str
+    yoe: str
+    team_size: str
+    funding: str
+    website: str
+# Create the FastAPI app instance
+app = FastAPI()
+@app.post("/create-job")
+def create_job(job: JobInput):
+    try:
+        result = fastapi_func(
+            links=job.links,
+            company=job.company,
+            role=job.role,
+            one_liner=job.one_liner,
+            reward=job.reward,
+            locations=job.locations,
+            tech_stack=job.tech_stack,
+            workplace=job.workplace,
+            salary=job.salary,
+            equity=job.equity,
+            yoe=job.yoe,
+            team_size=job.team_size,
+            funding=job.funding,
+            website=job.website
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+pydantic
+langchain-google-genai
+langchain-core
+firecrawl
+gspread
+python-dotenv