linhkid91 commited on
Commit
284474f
·
1 Parent(s): e3e24ea

Add other fields and fix JSON format errors

Browse files
Files changed (4) hide show
  1. config.yaml +1 -1
  2. src/action.py +5 -1
  3. src/relevancy.py +19 -5
  4. src/relevancy_prompt.txt +1 -1
config.yaml CHANGED
@@ -9,7 +9,7 @@ categories: ["Artificial Intelligence", "Computation and Language", "Machine Lea
9
  # will have their papers filtered out.
10
  #
11
  # Must be within 1-10
12
- threshold: 6
13
 
14
  # A natural language statement that the large language model will use to judge which papers are relevant
15
  #
 
9
  # will have their papers filtered out.
10
  #
11
  # Must be within 1-10
12
+ threshold: 7
13
 
14
  # A natural language statement that the large language model will use to judge which papers are relevant
15
  #
src/action.py CHANGED
@@ -251,7 +251,11 @@ def generate_body(topic, categories, interest, threshold):
251
  )
252
  body = "<br><br>".join(
253
  [
254
- f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}'
 
 
 
 
255
  for paper in relevancy
256
  ]
257
  )
 
251
  )
252
  body = "<br><br>".join(
253
  [
254
+ f'<b>Title:</b> <a href="{paper["main_page"]}">{paper["title"]}</a><br><b>Authors:</b> {paper["authors"]}<br>'
255
+ f'<b>Score:</b> {paper["Relevancy score"]}<br><b>Reason:</b> {paper["Reasons for match"]}<br>'
256
+ f'<b>Goal:</b> {paper["Goal"]}<br><b>Data</b>: {paper["Data"]}<br><b>Methodology:</b> {paper["Methodology"]}<br>'
257
+ f'<b>Experiments & Results</b>: {paper["Experiments & Results"]}<br><b>Git</b>: {paper["Git"]}<br>'
258
+ f'<b>Discussion & Next steps</b>: {paper["Discussion & Next steps"]}'
259
  for paper in relevancy
260
  ]
261
  )
src/relevancy.py CHANGED
@@ -31,12 +31,22 @@ def encode_prompt(query, prompt_papers):
31
  prompt += f"{idx + 1}. Authors: {authors}\n"
32
  prompt += f"{idx + 1}. Abstract: {abstract}\n"
33
  prompt += f"\n Generate response:\n1."
34
- print(prompt)
35
  return prompt
36
 
37
 
 
 
 
 
 
 
 
 
38
  def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
39
  selected_data = []
 
 
40
  if response is None:
41
  return []
42
  json_items = response['message']['content'].replace("\n\n", "\n").split("\n")
@@ -45,12 +55,16 @@ def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
45
  try:
46
  score_items = [
47
  json.loads(re.sub(pattern, "", line))
48
- for line in json_items if "relevancy score" in line.lower()]
49
  except Exception as e:
50
- pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
51
- print(e) #linh edit, test
 
 
 
 
52
  raise RuntimeError("failed")
53
- pprint.pprint(score_items)
54
  scores = []
55
  for item in score_items:
56
  temp = item["Relevancy score"]
 
31
  prompt += f"{idx + 1}. Authors: {authors}\n"
32
  prompt += f"{idx + 1}. Abstract: {abstract}\n"
33
  prompt += f"\n Generate response:\n1."
34
+ #print(prompt)
35
  return prompt
36
 
37
 
38
+ def is_json(myjson):
39
+ try:
40
+ json.loads(myjson)
41
+ except ValueError as e:
42
+ return False
43
+ return True
44
+
45
+
46
  def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
47
  selected_data = []
48
+ print("HERE")
49
+ print(response['message']['content'])
50
  if response is None:
51
  return []
52
  json_items = response['message']['content'].replace("\n\n", "\n").split("\n")
 
55
  try:
56
  score_items = [
57
  json.loads(re.sub(pattern, "", line))
58
+ for line in json_items if (is_json(line) and "relevancy score" in line.lower())]
59
  except Exception as e:
60
+ #pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
61
+ try:
62
+ score_items = score_items[:-1]
63
+ except Exception:
64
+ score_items = []
65
+ print(e)
66
  raise RuntimeError("failed")
67
+ #pprint.pprint(score_items)
68
  scores = []
69
  for item in score_items:
70
  temp = item["Relevancy score"]
src/relevancy_prompt.txt CHANGED
@@ -3,6 +3,6 @@ Based on my specific research interests, relevancy score out of 10 for each pape
3
  Additionally, please generate summary, for each paper explaining why it's relevant to my research interests.
4
  Please keep the paper order the same as in the input list, with one json format per line. Example is:
5
 
6
- {"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings", "Goal":"Goal of the paper/What kind of pain points the paper is trying to solve?", "Data": "Short summary of the data source used in the paper", "Methodology": "Summary of methodologies authors described in the paper", "Experiments & Results": "Summary of results", "Git": "Link to the Github code repo (if available)", "Discussion & Next steps": "Further discussion and next steps of the research"}
7
 
8
  My research interests are: NLP, RAGs, LLM, Optmization in Machine learning, Data science, Generative AI, Optimization in LLM, Finance modelling ...
 
3
  Additionally, please generate summary, for each paper explaining why it's relevant to my research interests.
4
  Please keep the paper order the same as in the input list, with one json format per line. Example is:
5
 
6
+ 1. {"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings", "Goal": "What kind of pain points the paper is trying to solve?", "Data": "Summary of the data source used in the paper", "Methodology": "Summary of methodologies used in the paper", "Git": "Link to the code repo (if available)", "Experiments & Results": "Summary of any experiments & its results", "Discussion & Next steps": "Further discussion and next steps of the research"}
7
 
8
  My research interests are: NLP, RAGs, LLM, Optmization in Machine learning, Data science, Generative AI, Optimization in LLM, Finance modelling ...