ak0601 commited on
Commit
763d20d
·
verified ·
1 Parent(s): 888d419

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +164 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel, Field
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ import json
6
+ from firecrawl import FirecrawlApp
7
+ import gspread
8
+ import os
9
+ from dotenv import load_dotenv
10
+ import json
11
+
12
+ load_dotenv()
13
+
14
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
15
+ FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
16
+ SHEET_ID = os.getenv("SHEET_ID")
17
+ dic = os.getenv("genai")
18
+ if dic:
19
+ try:
20
+ dic1 = json.loads(dic)
21
+ print(dic1)
22
+ except json.JSONDecodeError:
23
+ print("Error: 'genai' environment variable is not valid JSON")
24
+ else:
25
+ print("Warning: 'genai' environment variable is not set")
26
+ # Setup Google Sheets connection (update the path and sheet name)
27
+ genai ={
28
+ "type": os.getenv("type"),
29
+ "project_id": os.getenv("project_id"),
30
+ "private_key_id": os.getenv("private_key_id"),
31
+ "private_key": os.getenv("private_key"),
32
+ "client_email": os.getenv("client_email"),
33
+ "client_id": os.getenv("client_id"),
34
+ "auth_uri": os.getenv("auth_uri"),
35
+ "token_uri": os.getenv("token_uri"),
36
+ "auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url"),
37
+ "client_x509_cert_url": os.getenv("client_x509_cert_url"),
38
+ "universe_domain": os.getenv("universe_domain")
39
+ }
40
+ gc = gspread.service_account_from_dict(dic1)
41
+ sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name
42
+ worksheet = sh.worksheet("S1") # Replace with your worksheet name if different
43
+
44
+ # Define your URL scraping function
45
+ def url_scrape(url):
46
+ app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
47
+ response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
48
+ try:
49
+ return response
50
+ except Exception:
51
+ return response
52
+
53
+ # Define the structured output model for job description extraction
54
+ class JDE(BaseModel):
55
+ Role: str = Field(description="Title of the job")
56
+ Company: str = Field(description="Name of the company")
57
+ Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
58
+ Industry: str = Field(description="Type of Industry the job belongs to")
59
+ Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
60
+ Location: str = Field(description="Location of the company")
61
+
62
+ # The core function that processes the job input and appends data to Google Sheets
63
+ def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
64
+ # Scrape the job description from the provided link
65
+ jd = url_scrape(links)
66
+
67
+ # Create the prompt for the language model
68
+ system = (
69
+ "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
70
+ )
71
+ prompt_text = """
72
+ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
73
+ Try to return your answer in JSON format based on the following structure:
74
+ {{
75
+ "Role": "Title of the job",
76
+ "Company": "Name of the company the job is about",
77
+ "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
78
+ "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
79
+ "Type": "Working style (Remote, Hybrid, Onsite)",
80
+ "Location": "Location of the company"
81
+ }}
82
+ Job Description: {jd}
83
+ """
84
+
85
+ query_prompt = ChatPromptTemplate.from_messages([
86
+ ("system", system),
87
+ ("human", """
88
+ You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
89
+ Try to return your answer in JSON format based on the following structure:
90
+ {{
91
+ "Role": "Title of the job",
92
+ "Company": "Name of the company the job is about",
93
+ "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
94
+ "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
95
+ "Type": "Working style (Remote, Hybrid, Onsite)",
96
+ "Location": "Location of the company"
97
+ }}
98
+ Job Description: {job_description}
99
+ """)
100
+ ])
101
+
102
+ # Initialize the language model and set it up for structured output using the JDE model
103
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
104
+ str_llm = llm.with_structured_output(JDE)
105
+ JDE_re = query_prompt | str_llm
106
+ # Invoke the language model to extract structured job details
107
+ q = JDE_re.invoke({"job_description": jd})
108
+
109
+ # Extract additional fields
110
+ req = q.Requirements
111
+ indus = q.Industry
112
+
113
+ # Prepare the row with all the data (append the two extra fields at the end)
114
+ row = [
115
+ links, company, role, one_liner, reward, locations,
116
+ tech_stack, workplace, salary, equity, yoe, team_size,
117
+ funding, website, req, indus
118
+ ]
119
+ worksheet.append_row(row)
120
+
121
+ return q
122
+
123
+ # Define a Pydantic model for the API input
124
+ class JobInput(BaseModel):
125
+ links: str
126
+ company: str
127
+ role: str
128
+ one_liner: str
129
+ reward: str
130
+ locations: str
131
+ tech_stack: str
132
+ workplace: str
133
+ salary: str
134
+ equity: str
135
+ yoe: str
136
+ team_size: str
137
+ funding: str
138
+ website: str
139
+
140
+ # Create the FastAPI app instance
141
+ app = FastAPI()
142
+
143
+ @app.post("/create-job")
144
+ def create_job(job: JobInput):
145
+ try:
146
+ result = fastapi_func(
147
+ links=job.links,
148
+ company=job.company,
149
+ role=job.role,
150
+ one_liner=job.one_liner,
151
+ reward=job.reward,
152
+ locations=job.locations,
153
+ tech_stack=job.tech_stack,
154
+ workplace=job.workplace,
155
+ salary=job.salary,
156
+ equity=job.equity,
157
+ yoe=job.yoe,
158
+ team_size=job.team_size,
159
+ funding=job.funding,
160
+ website=job.website
161
+ )
162
+ return result
163
+ except Exception as e:
164
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ langchain-google-genai
5
+ langchain-core
6
+ firecrawl
7
+ gspread
8
+ python-dotenv