ccm commited on
Commit
8ac0465
·
0 Parent(s):

Initial commit

Browse files
Files changed (7) hide show
  1. .gitattributes +2 -0
  2. .gitignore +152 -0
  3. LICENSE +21 -0
  4. README.md +9 -0
  5. app.py +269 -0
  6. data/Names_2010Census.csv +0 -0
  7. requirements.txt +8 -0
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
.gitignore ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105
+ __pypackages__/
106
+
107
+ # Celery stuff
108
+ celerybeat-schedule
109
+ celerybeat.pid
110
+
111
+ # SageMath parsed files
112
+ *.sage.py
113
+
114
+ # Environments
115
+ .env
116
+ .venv
117
+ env/
118
+ venv/
119
+ ENV/
120
+ env.bak/
121
+ venv.bak/
122
+
123
+ # Spyder project settings
124
+ .spyderproject
125
+ .spyproject
126
+
127
+ # Rope project settings
128
+ .ropeproject
129
+
130
+ # mkdocs documentation
131
+ /site
132
+
133
+ # mypy
134
+ .mypy_cache/
135
+ .dmypy.json
136
+ dmypy.json
137
+
138
+ # Pyre type checker
139
+ .pyre/
140
+
141
+ # pytype static type analyzer
142
+ .pytype/
143
+
144
+ # Cython debug symbols
145
+ cython_debug/
146
+
147
+ # PyCharm
148
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
151
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152
+ #.idea/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Chris McComb
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Cite Diversely
3
+ emoji: 🎓
4
+ colorFrom: orange
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ app_file: app.py
8
+ pinned: false
9
+ ---
app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is our main interface library
2
+ # For main things
3
+ import types
4
+
5
+ import bibtexparser
6
+ import csv
7
+ import gender_guesser.detector
8
+ import nameparser
9
+ import operator
10
+ import os
11
+ import pandas
12
+ import pathlib
13
+ import pickle
14
+ import plotly.express
15
+ import streamlit
16
+ import st_aggrid
17
+
18
+
19
+ class References(object):
20
+ def __init__(self, reference_text):
21
+ self.gender_options = ['male', 'mostly_male', 'andy', 'mostly_female', "female", "unknown",
22
+ "first_name_initial"]
23
+ self.gender_results = {key: 0 for key in self.gender_options}
24
+ self.race_options = ['pctwhite', 'pctblack', 'pctapi', 'pctaian', 'pct2prace', 'pcthispanic', 'race_unknown']
25
+ self.ethnicity_results = {key: 0 for key in self.race_options}
26
+ self.raw_results = {}
27
+
28
+ pickle_path = pathlib.Path(__file__).parent / 'data' / 'ethnicity_lookup.p'
29
+ csv_path = pathlib.Path(__file__).parent / 'data' / 'Names_2010Census.csv'
30
+
31
+ # Load data
32
+ if os.path.isfile(pickle_path):
33
+ self.ethnicity_lookup = pickle.load(open(pickle_path, 'rb'))
34
+ else:
35
+ self.ethnicity_lookup = {}
36
+ with open(csv_path) as csv_file:
37
+ reader = csv.DictReader(csv_file)
38
+ for row in reader:
39
+ self.ethnicity_lookup[row['name']] = {}
40
+ for race in self.race_options[:-1]:
41
+ try:
42
+ value = float(row[race])
43
+ except ValueError:
44
+ value = 0
45
+ self.ethnicity_lookup[row['name']][race] = value
46
+ pickle.dump(self.ethnicity_lookup, open(pickle_path, 'wb'))
47
+
48
+ # Parse names from input
49
+ self.reference_text = reference_text
50
+ self.references = bibtexparser.loads(reference_text)
51
+ self.first_names = []
52
+ self.last_names = []
53
+ self.raw_results = {'title': []}
54
+ for paper in self.references.entries:
55
+ if "author" in paper:
56
+ authors = paper["author"].split(' and ')
57
+ for person in authors:
58
+ self.raw_results['title'].append(paper['title'])
59
+ name = nameparser.HumanName(person)
60
+ self.first_names.append(name.first)
61
+ self.last_names.append(name.last)
62
+ self.raw_results['first_name'] = self.first_names
63
+ self.raw_results['last_name'] = self.last_names
64
+
65
+ def infer_ethnicity(self):
66
+ # Get ethnicity
67
+ most_likely_race = []
68
+ for name in self.last_names:
69
+ if name.upper() in self.ethnicity_lookup:
70
+ rr = max(self.ethnicity_lookup[name.upper()].items(), key=operator.itemgetter(1))[0]
71
+ most_likely_race.append(rr)
72
+ else:
73
+ most_likely_race.append('race_unknown')
74
+ self.raw_results['most_likely_race'] = most_likely_race
75
+
76
+ for i in most_likely_race:
77
+ self.ethnicity_results[i] = self.ethnicity_results.get(i, 0) + 1
78
+
79
+ def infer_gender(self):
80
+ # Get gender
81
+ most_likely_gender = []
82
+ d = gender_guesser.detector.Detector()
83
+ for name in self.first_names:
84
+ if (len(name) == 2 and name[1] == '.') or len(name) == 1:
85
+ most_likely_gender.append("first_name_initial")
86
+ else:
87
+ most_likely_gender.append(d.get_gender(name))
88
+ self.raw_results['most_likely_gender'] = most_likely_gender
89
+
90
+ for i in most_likely_gender:
91
+ self.gender_results[i] = self.gender_results.get(i, 0) + 1
92
+
93
+
94
+ label_to_gender = {'male': "Very Likely Male",
95
+ 'mostly_male': "Likely Male",
96
+ 'andy': "Hard to Tell",
97
+ 'mostly_female': "Likely Female",
98
+ "female": "Very Likely Female",
99
+ "unknown": "Unknown (model inconclusive)",
100
+ "first_name_initial": "Unknown (first name initial only)"}
101
+
102
+ label_to_ethnicity = {'pctwhite': 'White',
103
+ 'pctblack': 'Black',
104
+ 'pctapi': 'Asian or Pacific Islander',
105
+ 'pctaian': 'American Indian or Alaskan Native',
106
+ 'pct2prace': 'Two or more races',
107
+ 'pcthispanic': 'Hispanic',
108
+ 'race_unknown': 'Unknown (not found in database)'}
109
+
110
+ ethnicity_to_label = {v: k for k, v in label_to_ethnicity.items()}
111
+ gender_to_label = {v: k for k, v in label_to_gender.items()}
112
+
113
+
114
+ def make_table():
115
+ if 'table_data' in streamlit.session_state:
116
+ df = streamlit.session_state['table_data']
117
+ else:
118
+ refs = References(streamlit.session_state.bib)
119
+ refs.infer_gender()
120
+ refs.infer_ethnicity()
121
+
122
+ df = pandas.DataFrame(refs.raw_results["first_name"], columns=["First Name"])
123
+ df = df.join(pandas.DataFrame(refs.raw_results["last_name"], columns=["Last Name"]))
124
+ df = df.join(pandas.DataFrame([label_to_ethnicity[x] for x in refs.raw_results["most_likely_race"]],
125
+ columns=["Most Likely Ethnicity"]))
126
+ df = df.join(pandas.DataFrame([label_to_gender[x] for x in refs.raw_results["most_likely_gender"]],
127
+ columns=["Most Likely Gender"]))
128
+ df = df.join(pandas.DataFrame(refs.raw_results["title"], columns=["Title"]))
129
+ df = df.sort_values(["Last Name", "First Name"])
130
+ df = df.reset_index(drop=True)
131
+
132
+ gb = st_aggrid.GridOptionsBuilder.from_dataframe(df)
133
+ gb.configure_default_column(editable=True)
134
+
135
+ gb.configure_column('Most Likely Ethnicity',
136
+ cellEditor='agRichSelectCellEditor',
137
+ cellEditorParams={'values': list(label_to_ethnicity.values())},
138
+ cellEditorPopup=True
139
+ )
140
+
141
+ gb.configure_column('Most Likely Gender',
142
+ cellEditor='agRichSelectCellEditor',
143
+ cellEditorParams={'values': list(label_to_gender.values())},
144
+ cellEditorPopup=True
145
+ )
146
+
147
+ gb.configure_column('Title',
148
+ editable=False
149
+ )
150
+
151
+ # gb.configure_grid_options(enableRangeSelection=True)
152
+
153
+ response = st_aggrid.AgGrid(
154
+ data=df,
155
+ gridOptions=gb.build(),
156
+ fit_columns_on_grid_load=True,
157
+ )
158
+
159
+ streamlit.session_state['table_data'] = response.data
160
+ if response.column_state:
161
+ streamlit.experimental_rerun()
162
+
163
+
164
+ # Define a function for addition
165
+ def make_results():
166
+ data = streamlit.session_state['table_data']
167
+ refs = types.SimpleNamespace(
168
+ ethnicity_results=data['Most Likely Ethnicity'].value_counts().to_dict(),
169
+ gender_results=data['Most Likely Gender'].value_counts().to_dict(),
170
+ )
171
+
172
+ plt1 = plotly.express.pie(
173
+ names=list(refs.ethnicity_results.keys()),
174
+ values=refs.ethnicity_results.values(),
175
+ hole=0.5,
176
+ )
177
+ plt2 = plotly.express.pie(
178
+ names=list(refs.gender_results.keys()),
179
+ values=refs.gender_results.values(),
180
+ hole=0.5,
181
+ )
182
+ plt3 = plotly.express.pie(
183
+ names=list(refs.gender_results.keys()),
184
+ values=refs.gender_results.values(),
185
+ hole=0.5,
186
+ )
187
+ plt1.update_layout(legend=dict(orientation="h"))
188
+ plt2.update_layout(legend=dict(orientation="h"))
189
+ plt3.update_layout(legend=dict(orientation="h"))
190
+
191
+ tab1, tab2, tab3 = streamlit.tabs(["Ethnicity", "Gender", "Accuracy"])
192
+
193
+ with tab1:
194
+ streamlit.plotly_chart(plt1, use_container_width=True)
195
+ with tab2:
196
+ streamlit.plotly_chart(plt2, use_container_width=True)
197
+ with tab2:
198
+ streamlit.plotly_chart(plt3, use_container_width=True)
199
+
200
+
201
+ streamlit.title("Welcome, and thank you")
202
+ streamlit.markdown("""Simply put, many people often cite people that are like them. This is a problem because academia has historically been white male dominated, leading to the suppression of marginalized voices. If your citations are biased towards people who look like you, then you are missing out on high-quality work.
203
+
204
+ Its important to note that using this site is not a replacement for truly being diligent and engaged in citing diverse voices. Rather, this site is just a place to start, and hopefully the first step in your journey of citing more diversely. To learn more about your duty to dismantle institutional oppression through your citation practices, read up here:
205
+
206
+ - [Cite Black Women](https://www.citeblackwomencollective.org)
207
+ - [The Racial Politics of Citation](https://www.insidehighered.com/advice/2018/04/27/racial-exclusions-scholarly-citations-opinion")
208
+ - [Inclusive Citation: How Diverse Are Your References?](https://blog.mahabali.me/writing/inclusive-citation-how-diverse-are-your-references/")
209
+
210
+ """)
211
+
212
+ streamlit.markdown("To use our tool, copy and paste your references in the box below and click on the "
213
+ "`Analyze` button.")
214
+
215
+ filler = """@article{Raina2019,
216
+ author = {Raina, Ayush and McComb, Christopher and Cagan, Jonathan},
217
+ title = {Learning to Design From Humans: Imitating Human Designers Through Deep Learning},
218
+ journal = {Journal of Mechanical Design},
219
+ volume = {141},
220
+ number = {11},
221
+ year = {2019},
222
+ month = {09},
223
+ issn = {1050-0472},
224
+ doi = {10.1115/1.4044256}
225
+ }
226
+
227
+ @article{Williams2019,
228
+ author = {Williams, Glen and Meisel, Nicholas A. and Simpson, Timothy W. and McComb, Christopher},
229
+ title = {Design Repository Effectiveness for 3D Convolutional Neural Networks: Application to Additive Manufacturing},
230
+ journal = {Journal of Mechanical Design},
231
+ volume = {141},
232
+ number = {11},
233
+ year = {2019},
234
+ month = {09},
235
+ issn = {1050-0472},
236
+ doi = {10.1115/1.4044199}
237
+ }"""
238
+ if "bib" in streamlit.session_state:
239
+ filler = streamlit.session_state["bib"]
240
+
241
+ streamlit.text_area(".bibtex only for now, sorry!", filler, key="bib", height=250)
242
+ details = streamlit.sidebar
243
+ details.selectbox("Gender Inference Model", ("gender_guesser", "genderComputer"))
244
+ details.selectbox("Ethnicity Inference Model", ("ethnicolr - census data",
245
+ "ethnicolr - wikipedia data",
246
+ "ethnicolr - North Carolina data",
247
+ "ethnicolr - Florida registration data"))
248
+
249
+
250
+
251
+ placeholder = streamlit.empty()
252
+ time_to_analyze = placeholder.button("Analyze")
253
+ if time_to_analyze or 'already_analyzed' in streamlit.session_state:
254
+ streamlit.session_state['already_analyzed'] = True
255
+ placeholder.empty()
256
+ with streamlit.spinner("Analyzing..."):
257
+ streamlit.markdown("""This table display a tabular version of your results. You can also edit the inferred
258
+ ethnicity and gender to improve the accuracy of results.
259
+ """)
260
+ make_table()
261
+
262
+ placeholder2 = streamlit.empty()
263
+ time_to_plot = placeholder2.button("Plot")
264
+ if time_to_plot or 'already_plotted' in streamlit.session_state:
265
+ streamlit.session_state['already_plotted'] = True
266
+ placeholder2.empty()
267
+ with streamlit.spinner("Plotting..."):
268
+ streamlit.markdown("These tabs summarize your results with a variety of visualizations and statistics.")
269
+ make_results()
data/Names_2010Census.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ bibtexparser
2
+ gender_guesser
3
+ nameparser
4
+ pandas
5
+ pathlib
6
+ plotly
7
+ streamlit
8
+ streamlit-aggrid