Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import pickle
|
6 |
+
import sklearn
|
7 |
+
from datasets import load_dataset
|
8 |
+
|
9 |
+
data = pd.read_csv("Dataset/mldata.csv")
|
10 |
+
|
11 |
+
|
12 |
+
#load prediction model from notebook
|
13 |
+
pickleFile = open('rfweights.pkl','rb')
|
14 |
+
rfmodel = pickle.load(pickleFile)
|
15 |
+
|
16 |
+
#Obtain the categorical/nominal data because it is not coded according (but based on the first occurence, first come first assign number)
|
17 |
+
#Therefore, need to read from the file to obtain the number.
|
18 |
+
categorical_cols = data[[
|
19 |
+
'certifications',
|
20 |
+
'workshops',
|
21 |
+
'Interested subjects',
|
22 |
+
'interested career area ',
|
23 |
+
'Type of company want to settle in?',
|
24 |
+
'Interested Type of Books'
|
25 |
+
]]
|
26 |
+
#assign the datatype and automated assigned code
|
27 |
+
for i in categorical_cols:
|
28 |
+
data[i] = data[i].astype('category')
|
29 |
+
data[i] = data[i].cat.codes
|
30 |
+
|
31 |
+
#embedded nominal/ categorical values for certicates
|
32 |
+
certificates_name = list(categorical_cols['certifications'].unique())
|
33 |
+
certificates_code = list(data['certifications'].unique())
|
34 |
+
certificates_references = dict(zip(certificates_name,certificates_code))
|
35 |
+
|
36 |
+
#embedding for workshops
|
37 |
+
workshop_name = list(categorical_cols['workshops'].unique())
|
38 |
+
workshop_code = list(data['workshops'].unique())
|
39 |
+
workshop_references = dict(zip(workshop_name, workshop_code))
|
40 |
+
|
41 |
+
#embedding for subjects_interests
|
42 |
+
subjects_interest_name = list(categorical_cols['Interested subjects'].unique())
|
43 |
+
subjects_interest_code = list(data['Interested subjects'].unique())
|
44 |
+
subjects_interest_references = dict(zip(subjects_interest_name, subjects_interest_code))
|
45 |
+
|
46 |
+
#embedding for career_interests
|
47 |
+
career_interest_name = list(categorical_cols['interested career area '].unique())
|
48 |
+
career_interest_code = list(data['interested career area '].unique())
|
49 |
+
career_interest_references = dict(zip(career_interest_name, career_interest_code))
|
50 |
+
|
51 |
+
#embedding for company_intends
|
52 |
+
company_intends_name = list(categorical_cols['Type of company want to settle in?'].unique())
|
53 |
+
company_intends_code = list(data['Type of company want to settle in?'].unique())
|
54 |
+
company_intends_references = dict(zip(company_intends_name, company_intends_code))
|
55 |
+
|
56 |
+
#embedding for book_interests
|
57 |
+
book_interest_name = list(categorical_cols['Interested Type of Books'].unique())
|
58 |
+
book_interest_code = list(data['Interested Type of Books'].unique())
|
59 |
+
book_interest_references = dict(zip(book_interest_name, book_interest_code))
|
60 |
+
|
61 |
+
|
62 |
+
def greet(name):
|
63 |
+
return f"Hello, {name}!"
|
64 |
+
|
65 |
+
'''#dummy encode
|
66 |
+
def dummy_encode(df):
|
67 |
+
if input == "Management":
|
68 |
+
return [1, 0]
|
69 |
+
elif input == "Technical":
|
70 |
+
return [0, 1]
|
71 |
+
elif input == "smart worker":
|
72 |
+
return [1, 0]
|
73 |
+
elif input == "hard worker":
|
74 |
+
return [0, 1]
|
75 |
+
else:
|
76 |
+
return "Invalid choice"'''
|
77 |
+
|
78 |
+
def rfprediction(name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
|
79 |
+
self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability
|
80 |
+
,subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
|
81 |
+
team_player, management_technical, smart_hardworker):
|
82 |
+
df = pd.DataFrame.from_dict(
|
83 |
+
{
|
84 |
+
"logical_thinking": [logical_thinking],
|
85 |
+
"hackathon_attend": [hackathon_attend],
|
86 |
+
"coding_skills": [coding_skills],
|
87 |
+
"public_speaking_skills": [public_speaking_skills],
|
88 |
+
"self_learning": [self_learning],
|
89 |
+
"extra_course": [extra_course],
|
90 |
+
"certificate": [certificate_code],
|
91 |
+
"workshop": [worskhop_code],
|
92 |
+
"read_writing_skills": [
|
93 |
+
(0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
|
94 |
+
],
|
95 |
+
"memory_capability": [
|
96 |
+
(0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)
|
97 |
+
],
|
98 |
+
"subject_interest": [subject_interest],
|
99 |
+
"career_interest": [career_interest],
|
100 |
+
"company_intend": [company_intend],
|
101 |
+
"senior_elder_advise": [senior_elder_advise],
|
102 |
+
"book_interest": [book_interest],
|
103 |
+
"introvert_extro": [introvert_extro],
|
104 |
+
"team_player": [team_player],
|
105 |
+
"management_technical":[management_technical],
|
106 |
+
"smart_hardworker": [smart_hardworker]
|
107 |
+
}
|
108 |
+
)
|
109 |
+
|
110 |
+
#replace str to numeric representation, dtype chged to int8
|
111 |
+
df = df.replace({"certificate": certificates_references,
|
112 |
+
"workshop":workshop_references,
|
113 |
+
"subject_interest":subjects_interest_references,
|
114 |
+
"career_interest": career_interest_references,
|
115 |
+
"company_intend":company_intends_references,
|
116 |
+
"book_interest":book_interest_references})
|
117 |
+
|
118 |
+
#dummy encoding
|
119 |
+
#first we convert into list from df
|
120 |
+
userdata_list = df.values.tolist()
|
121 |
+
#now we append boolean based conditions
|
122 |
+
if(df["management_technical"].values == "Management"):
|
123 |
+
userdata_list[0].extend([1])
|
124 |
+
userdata_list[0].extend([0])
|
125 |
+
userdata_list[0].remove('Management')
|
126 |
+
elif(df["management_technical"].values == "Technical"):
|
127 |
+
userdata_list[0].extend([0])
|
128 |
+
userdata_list[0].extend([1])
|
129 |
+
userdata_list[0].remove('Technical')
|
130 |
+
else: return "Err"
|
131 |
+
|
132 |
+
if(df["smart_hardworker"].values == "smart worker"):
|
133 |
+
userdata_list[0].extend([1])
|
134 |
+
userdata_list[0].extend([0])
|
135 |
+
userdata_list[0].remove('smart worker')
|
136 |
+
elif(df["smart_hardworker"].values == "hard worker"):
|
137 |
+
userdata_list[0].extend([0])
|
138 |
+
userdata_list[0].extend([1])
|
139 |
+
userdata_list[0].remove('hard worker')
|
140 |
+
else: return "Err"
|
141 |
+
|
142 |
+
prediction_result = rfmodel.predict(userdata_list)
|
143 |
+
prediction_result_all = rfmodel.predict_proba(userdata_list)
|
144 |
+
print(prediction_result_all)
|
145 |
+
#create a list for output
|
146 |
+
result_list = {"Applications Developer": float(prediction_result_all[0][0]),
|
147 |
+
"CRM Technical Developer": float(prediction_result_all[0][1]),
|
148 |
+
"Database Developer": float(prediction_result_all[0][2]),
|
149 |
+
"Mobile Applications Developer": float(prediction_result_all[0][3]),
|
150 |
+
"Network Security Engineer": float(prediction_result_all[0][4]),
|
151 |
+
"Software Developer": float(prediction_result_all[0][5]),
|
152 |
+
"Software Engineer": float(prediction_result_all[0][6]),
|
153 |
+
"Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
|
154 |
+
"Systems Security Administrator": float(prediction_result_all[0][8]),
|
155 |
+
"Technical Support": float(prediction_result_all[0][9]),
|
156 |
+
"UX Designer": float(prediction_result_all[0][10]),
|
157 |
+
"Web Developer": float(prediction_result_all[0][11]),
|
158 |
+
}
|
159 |
+
return result_list
|
160 |
+
|
161 |
+
cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
|
162 |
+
workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
|
163 |
+
skill = ["excellent", "medium", "poor"] #can be used in this section and memory capability section
|
164 |
+
subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
|
165 |
+
career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
|
166 |
+
company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
|
167 |
+
book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
|
168 |
+
Choice_list = ["Management", "Technical"]
|
169 |
+
worker_list = ["hard worker", "smart worker"]
|
170 |
+
|
171 |
+
demo =gr.Interface(fn = rfprediction, inputs=[
|
172 |
+
gr.Textbox(placeholder="What is your name?", label="Name"),
|
173 |
+
gr.Slider(minimum=1,maximum=9,value=3,step=1,label="Are you a logical thinking person?", info="Scale: 1 - 9"),
|
174 |
+
gr.Slider(minimum=0,maximum=6,value=0,step=1,label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
|
175 |
+
gr.Slider(minimum=1,maximum=9,value=5,step=1,label="How do you rate your coding skills?", info="Scale: 1 - 9"),
|
176 |
+
gr.Slider(minimum=1,maximum=9,value=3,step=1,label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
|
177 |
+
gr.Radio({"Yes", "No"}, type="index", label="Are you a self-learning person? *"),
|
178 |
+
gr.Radio({"Yes", "No"}, type="index", label="Do you take extra courses in uni (other than IT)? *"),
|
179 |
+
gr.Dropdown(cert_list, label="Select a certificate you took!"),
|
180 |
+
gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
|
181 |
+
gr.Dropdown(skill, label="Select your read and writing skill"),
|
182 |
+
gr.Dropdown(skill, label="Is your memory capability good?"),
|
183 |
+
gr.Dropdown(subject_list, label="What subject you are interested in?"),
|
184 |
+
gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
|
185 |
+
gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
|
186 |
+
gr.Radio({"Yes", "No"}, type="index", label="Do you ever seek any advices from senior or elders? *"),
|
187 |
+
gr.Dropdown(book_list, label="Select your interested genre of book!"),
|
188 |
+
gr.Radio({"Yes", "No"}, type="index", label="Are you an Introvert?| No - extrovert *"),
|
189 |
+
gr.Radio({"Yes", "No"}, type="index", label="Ever worked in a team? *"),
|
190 |
+
gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
|
191 |
+
gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
|
192 |
+
],
|
193 |
+
outputs=gr.Label(num_top_classes=5),
|
194 |
+
title=" ",
|
195 |
+
description="Members: " "
|
196 |
+
)
|
197 |
+
|
198 |
+
|
199 |
+
#main
|
200 |
+
if __name__ == "__main__":
|
201 |
+
demo.launch(share=True)
|
202 |
+
|
203 |
+
|
204 |
+
|
205 |
+
|