Kabilash10 commited on
Commit
e2faac0
·
verified ·
1 Parent(s): c9890fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -25
app.py CHANGED
@@ -83,8 +83,7 @@ def get_docparser_data(file, api_key, parser_id) -> Optional[dict]:
83
  import base64
84
  auth_string = base64.b64encode(f"{api_key}:".encode()).decode()
85
  headers = {
86
- 'Authorization': f'Basic {auth_string}',
87
- 'Content-Type': 'multipart/form-data'
88
  }
89
 
90
  # Prepare the file for upload
@@ -95,55 +94,60 @@ def get_docparser_data(file, api_key, parser_id) -> Optional[dict]:
95
  # Upload document
96
  upload_response = requests.post(
97
  upload_url,
98
- headers={'Authorization': f'Basic {auth_string}'},
99
  files=files
100
  )
101
  upload_response.raise_for_status()
102
 
103
  # Get document ID from upload response
104
  upload_data = upload_response.json()
105
- if not isinstance(upload_data, list) or len(upload_data) == 0:
106
- st.error(f"Invalid response from Docparser upload: {upload_data}")
107
- return None
108
-
109
- document_id = upload_data[0].get('id')
110
  if not document_id:
111
  st.error("Failed to get document ID from upload response")
112
  return None
113
 
114
  # Wait a moment for processing
115
  import time
116
- time.sleep(2) # Give Docparser time to process the document
117
 
118
  # Get parsed results
119
  results_url = f"https://api.docparser.com/v1/results/{parser_id}/{document_id}"
120
  results_response = requests.get(
121
  results_url,
122
- headers={'Authorization': f'Basic {auth_string}'}
123
  )
124
  results_response.raise_for_status()
125
 
126
  # Handle results
127
  results_data = results_response.json()
128
 
129
- # Debug information
130
- st.write("Debug - API Response:", results_data)
131
-
132
  if isinstance(results_data, list) and len(results_data) > 0:
133
- # Extract the relevant fields based on your Docparser parser configuration
 
134
  parsed_data = {
135
- 'name': results_data[0].get('full_name', 'Unknown'),
136
- 'email': results_data[0].get('email', 'Unknown'),
137
- 'phone': results_data[0].get('phone', 'Unknown'),
138
- 'skills': [skill.strip() for skill in results_data[0].get('skills', '').split(',') if skill.strip()],
139
- 'certifications': results_data[0].get('certifications', []),
140
- 'experience_years': float(results_data[0].get('experience_years', 0)),
141
- 'degree': results_data[0].get('degree', 'Not specified'),
142
- 'institution': results_data[0].get('institution', 'Not specified'),
143
- 'year': results_data[0].get('graduation_year', 'Not specified'),
144
- 'summary': results_data[0].get('summary', 'No summary available'),
145
- 'projects': results_data[0].get('projects', [])
146
  }
 
 
 
 
 
 
 
 
 
147
  return parsed_data
148
  else:
149
  st.error(f"No parsed data received from Docparser: {results_data}")
@@ -158,6 +162,8 @@ def get_docparser_data(file, api_key, parser_id) -> Optional[dict]:
158
  st.error("Raw response content: " + str(upload_response.content if 'upload_response' in locals() else 'No response'))
159
  except Exception as e:
160
  st.error(f"Error fetching data from Docparser: {e}")
 
 
161
  return None
162
 
163
  def get_openai_data(file, openai_key: str) -> Optional[dict]:
 
83
  import base64
84
  auth_string = base64.b64encode(f"{api_key}:".encode()).decode()
85
  headers = {
86
+ 'Authorization': f'Basic {auth_string}'
 
87
  }
88
 
89
  # Prepare the file for upload
 
94
  # Upload document
95
  upload_response = requests.post(
96
  upload_url,
97
+ headers=headers,
98
  files=files
99
  )
100
  upload_response.raise_for_status()
101
 
102
  # Get document ID from upload response
103
  upload_data = upload_response.json()
104
+
105
+ # Extract document ID from the correct response format
106
+ document_id = upload_data.get('id')
 
 
107
  if not document_id:
108
  st.error("Failed to get document ID from upload response")
109
  return None
110
 
111
  # Wait a moment for processing
112
  import time
113
+ time.sleep(3) # Increased wait time to ensure document is processed
114
 
115
  # Get parsed results
116
  results_url = f"https://api.docparser.com/v1/results/{parser_id}/{document_id}"
117
  results_response = requests.get(
118
  results_url,
119
+ headers=headers
120
  )
121
  results_response.raise_for_status()
122
 
123
  # Handle results
124
  results_data = results_response.json()
125
 
 
 
 
126
  if isinstance(results_data, list) and len(results_data) > 0:
127
+ # Map the fields according to your Docparser parser configuration
128
+ result = results_data[0] # Get the first result
129
  parsed_data = {
130
+ 'name': result.get('name', result.get('full_name', 'Unknown')),
131
+ 'email': result.get('email', 'Unknown'),
132
+ 'phone': result.get('phone', result.get('phone_number', 'Unknown')),
133
+ 'skills': result.get('skills', []),
134
+ 'certifications': result.get('certifications', []),
135
+ 'experience_years': float(result.get('experience_years', 0)),
136
+ 'degree': result.get('degree', result.get('education_degree', 'Not specified')),
137
+ 'institution': result.get('institution', result.get('university', 'Not specified')),
138
+ 'year': result.get('year', result.get('graduation_year', 'Not specified')),
139
+ 'summary': result.get('summary', result.get('profile_summary', 'No summary available')),
140
+ 'projects': result.get('projects', [])
141
  }
142
+
143
+ # Convert skills from string to list if needed
144
+ if isinstance(parsed_data['skills'], str):
145
+ parsed_data['skills'] = [skill.strip() for skill in parsed_data['skills'].split(',')]
146
+
147
+ # Convert certifications from string to list if needed
148
+ if isinstance(parsed_data['certifications'], str):
149
+ parsed_data['certifications'] = [cert.strip() for cert in parsed_data['certifications'].split(',')]
150
+
151
  return parsed_data
152
  else:
153
  st.error(f"No parsed data received from Docparser: {results_data}")
 
162
  st.error("Raw response content: " + str(upload_response.content if 'upload_response' in locals() else 'No response'))
163
  except Exception as e:
164
  st.error(f"Error fetching data from Docparser: {e}")
165
+ st.error(f"Upload data: {upload_data if 'upload_data' in locals() else 'No upload data'}")
166
+ st.error(f"Results data: {results_data if 'results_data' in locals() else 'No results data'}")
167
  return None
168
 
169
  def get_openai_data(file, openai_key: str) -> Optional[dict]: