[email protected] commited on
Commit
7be836b
·
1 Parent(s): e1bba7d

add scholarly green flags

Browse files
Files changed (2) hide show
  1. scholarly_flags.json +0 -0
  2. source_eval_model.py +7 -13
scholarly_flags.json CHANGED
The diff for this file is too large to render. See raw diff
 
source_eval_model.py CHANGED
@@ -223,36 +223,30 @@ def eval_scholarly_sources(citation):
223
  the citation dictionary, which has format {"external_link": str, "type": str, "html_tag": HTMLElement, "publisher": str | None, "date": datetime |None}
224
  Output:
225
  the tag for citation (red, green, yellow, unknown)
226
- """
227
-
228
  # read the dictionaries of flags from the json file
229
  with open("scholarly_flags.json", "r") as f:
230
  all_flags = json.load(f)
231
 
232
- found_tag = False
233
  # Check on the domain of external link
234
  if citation["external_link"]:
235
  domain = tldextract.extract(citation["external_link"]).domain
236
  if domain in all_flags["red_scholarly_reverse"]:
237
- found_tag = True
238
- # print("The citation is", citation, "which matches the red list item", red_flags_reverse[domain])
239
  return "red"
240
  elif domain in all_flags["yellow_scholarly_reverse"]:
241
- # print("The citation is ", citation, "which matches the yellow list item ", yellow_flags_reverse[domain])
242
  return "yellow"
 
 
 
243
  #check on the name dictionary
244
- if not found_tag:
245
  if citation["publisher"] in all_flags["red_scholarly"]:
246
- found_tag = True
247
- # print("The citation is ", citation, "which matches the red list item", citation["publisher"])
248
  return "red"
249
  elif citation["publisher"] in all_flags["yellow_scholarly"]:
250
- found_tag = True
251
- # print("The citation is ", citation, "which matches the yellow list item", citation["publisher"])
252
  return "yellow"
 
 
253
 
254
- if not found_tag:
255
- return "unknown"
256
 
257
 
258
  def eval_non_scholarly_sources(citation, citation_val):
 
223
  the citation dictionary, which has format {"external_link": str, "type": str, "html_tag": HTMLElement, "publisher": str | None, "date": datetime |None}
224
  Output:
225
  the tag for citation (red, green, yellow, unknown)
226
+ """
 
227
  # read the dictionaries of flags from the json file
228
  with open("scholarly_flags.json", "r") as f:
229
  all_flags = json.load(f)
230
 
 
231
  # Check on the domain of external link
232
  if citation["external_link"]:
233
  domain = tldextract.extract(citation["external_link"]).domain
234
  if domain in all_flags["red_scholarly_reverse"]:
 
 
235
  return "red"
236
  elif domain in all_flags["yellow_scholarly_reverse"]:
 
237
  return "yellow"
238
+ elif domain in all_flags["green_scholarly_reverse"]:
239
+ return "green"
240
+
241
  #check on the name dictionary
 
242
  if citation["publisher"] in all_flags["red_scholarly"]:
 
 
243
  return "red"
244
  elif citation["publisher"] in all_flags["yellow_scholarly"]:
 
 
245
  return "yellow"
246
+ elif citation["publisher"] in all_flags["green_scholarly"]:
247
+ return "green"
248
 
249
+ return "unknown"
 
250
 
251
 
252
  def eval_non_scholarly_sources(citation, citation_val):