ccm commited on
Commit
9120b3c
·
1 Parent(s): 569cbb9

More model options

Browse files
Files changed (1) hide show
  1. app.py +7 -27
app.py CHANGED
@@ -3,12 +3,9 @@
3
  import types
4
 
5
  import bibtexparser
6
- import csv
7
  import gender_guesser.detector
8
  import nameparser
9
- import operator
10
  import pandas
11
- import pathlib
12
  import plotly.express
13
  import streamlit
14
  import st_aggrid
@@ -24,20 +21,6 @@ class References(object):
24
  self.ethnicity_results = {key: 0 for key in self.race_options}
25
  self.raw_results = pandas.DataFrame(columns=["First Name", "Last Name", "Title"])
26
 
27
- csv_path = pathlib.Path(__file__).parent / 'data' / 'Names_2010Census.csv'
28
-
29
- self.ethnicity_lookup = {}
30
- with open(csv_path) as csv_file:
31
- reader = csv.DictReader(csv_file)
32
- for row in reader:
33
- self.ethnicity_lookup[row['name']] = {}
34
- for race in self.race_options[:-1]:
35
- try:
36
- value = float(row[race])
37
- except ValueError:
38
- value = 0
39
- self.ethnicity_lookup[row['name']][race] = value
40
-
41
  # Parse names from input
42
  self.reference_text = reference_text
43
  self.references = bibtexparser.loads(reference_text)
@@ -50,16 +33,8 @@ class References(object):
50
 
51
  def infer_ethnicity(self):
52
  self.raw_results = ethnicolr.pred_census_ln(self.raw_results, 'Last Name', 2010)
53
- # Get ethnicity
54
- most_likely_race = []
55
- for name in self.raw_results['Last Name']:
56
- if name.upper() in self.ethnicity_lookup:
57
- rr = max(self.ethnicity_lookup[name.upper()].items(), key=operator.itemgetter(1))[0]
58
- most_likely_race.append(rr)
59
- else:
60
- most_likely_race.append('race_unknown')
61
  self.raw_results['Most Likely Ethnicity'] = self.raw_results['race']
62
- # self.raw_results.drop(labels=['race', 'pctwhite', 'pctblack', 'pctapi', 'pctaian', 'pct2prace', 'pcthispanic'])
63
 
64
  for i in self.raw_results['Most Likely Ethnicity']:
65
  self.ethnicity_results[i] = self.ethnicity_results.get(i, 0) + 1
@@ -98,7 +73,12 @@ label_to_gender = {'male': "Very Likely Male",
98
  "unknown": "Unknown (model inconclusive)",
99
  "first_name_initial": "Unknown (first name initial only)"}
100
 
101
- label_to_ethnicity = {'pctwhite': 'White',
 
 
 
 
 
102
  'pctblack': 'Black',
103
  'pctapi': 'Asian or Pacific Islander',
104
  'pctaian': 'American Indian or Alaskan Native',
 
3
  import types
4
 
5
  import bibtexparser
 
6
  import gender_guesser.detector
7
  import nameparser
 
8
  import pandas
 
9
  import plotly.express
10
  import streamlit
11
  import st_aggrid
 
21
  self.ethnicity_results = {key: 0 for key in self.race_options}
22
  self.raw_results = pandas.DataFrame(columns=["First Name", "Last Name", "Title"])
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Parse names from input
25
  self.reference_text = reference_text
26
  self.references = bibtexparser.loads(reference_text)
 
33
 
34
  def infer_ethnicity(self):
35
  self.raw_results = ethnicolr.pred_census_ln(self.raw_results, 'Last Name', 2010)
 
 
 
 
 
 
 
 
36
  self.raw_results['Most Likely Ethnicity'] = self.raw_results['race']
37
+ self.raw_results.drop(labels=['race', 'white', 'black', 'hispanic', 'api'])
38
 
39
  for i in self.raw_results['Most Likely Ethnicity']:
40
  self.ethnicity_results[i] = self.ethnicity_results.get(i, 0) + 1
 
73
  "unknown": "Unknown (model inconclusive)",
74
  "first_name_initial": "Unknown (first name initial only)"}
75
 
76
+ label_to_ethnicity = {
77
+ 'white': 'White',
78
+ 'black': 'Black',
79
+ 'api': 'Asian or Pacific Islander',
80
+ 'hispanic': 'Hispanic',
81
+ 'pctwhite': 'White',
82
  'pctblack': 'Black',
83
  'pctapi': 'Asian or Pacific Islander',
84
  'pctaian': 'American Indian or Alaskan Native',