Richard Fan commited on
Commit
3dd0355
·
1 Parent(s): 1912e89

use gpt-3.5-turbo-16k and increase batch size

Browse files
Files changed (2) hide show
  1. src/action.py +1 -1
  2. src/relevancy.py +6 -6
src/action.py CHANGED
@@ -92,7 +92,7 @@ def generate_body(topic, categories, interest, threshold):
92
  papers,
93
  query={"interest": interest},
94
  threshold_score=threshold,
95
- num_paper_in_prompt=8)
96
  body = "<br><br>".join(
97
  [f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}'
98
  for paper in relevancy])
 
92
  papers,
93
  query={"interest": interest},
94
  threshold_score=threshold,
95
+ num_paper_in_prompt=16)
96
  body = "<br><br>".join(
97
  [f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}'
98
  for paper in relevancy])
src/relevancy.py CHANGED
@@ -2,7 +2,7 @@
2
  run:
3
  python -m relevancy run_all_day_paper \
4
  --output_dir ./data \
5
- --model_name="gpt-3.5-turbo" \
6
  """
7
  import time
8
  import json
@@ -53,7 +53,7 @@ def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
53
  scores = []
54
  for item in score_items:
55
  temp = item["Relevancy score"]
56
- if "/" in temp:
57
  scores.append(int(temp.split("/")[0]))
58
  else:
59
  scores.append(int(temp))
@@ -72,7 +72,7 @@ def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
72
  output_str += "Link: " + paper_data[idx]["main_page"] + "\n"
73
  for key, value in inst.items():
74
  paper_data[idx][key] = value
75
- output_str += key + ": " + value + "\n"
76
  paper_data[idx]['summarized_text'] = output_str
77
  selected_data.append(paper_data[idx])
78
  return selected_data, hallucination
@@ -90,7 +90,7 @@ def process_subject_fields(subjects):
90
  def generate_relevance_score(
91
  all_papers,
92
  query,
93
- model_name="gpt-3.5-turbo",
94
  threshold_score=8,
95
  num_paper_in_prompt=4,
96
  temperature=0.4,
@@ -132,7 +132,7 @@ def generate_relevance_score(
132
  print(f"Post-processing took {time.time() - process_start:.2f}s")
133
 
134
  if sorting:
135
- ans_data = sorted(ans_data, key=lambda x: x["Relevancy score"], reverse=True)
136
 
137
  return ans_data, hallucination
138
 
@@ -140,7 +140,7 @@ def run_all_day_paper(
140
  query={"interest":"", "subjects":["Computation and Language", "Artificial Intelligence"]},
141
  date=None,
142
  data_dir="../data",
143
- model_name="gpt-3.5-turbo",
144
  threshold_score=8,
145
  num_paper_in_prompt=8,
146
  temperature=0.4,
 
2
  run:
3
  python -m relevancy run_all_day_paper \
4
  --output_dir ./data \
5
+ --model_name="gpt-3.5-turbo-16k" \
6
  """
7
  import time
8
  import json
 
53
  scores = []
54
  for item in score_items:
55
  temp = item["Relevancy score"]
56
+ if isinstance(temp, str) and "/" in temp:
57
  scores.append(int(temp.split("/")[0]))
58
  else:
59
  scores.append(int(temp))
 
72
  output_str += "Link: " + paper_data[idx]["main_page"] + "\n"
73
  for key, value in inst.items():
74
  paper_data[idx][key] = value
75
+ output_str += str(key) + ": " + str(value) + "\n"
76
  paper_data[idx]['summarized_text'] = output_str
77
  selected_data.append(paper_data[idx])
78
  return selected_data, hallucination
 
90
  def generate_relevance_score(
91
  all_papers,
92
  query,
93
+ model_name="gpt-3.5-turbo-16k",
94
  threshold_score=8,
95
  num_paper_in_prompt=4,
96
  temperature=0.4,
 
132
  print(f"Post-processing took {time.time() - process_start:.2f}s")
133
 
134
  if sorting:
135
+ ans_data = sorted(ans_data, key=lambda x: int(x["Relevancy score"]), reverse=True)
136
 
137
  return ans_data, hallucination
138
 
 
140
  query={"interest":"", "subjects":["Computation and Language", "Artificial Intelligence"]},
141
  date=None,
142
  data_dir="../data",
143
+ model_name="gpt-3.5-turbo-16k",
144
  threshold_score=8,
145
  num_paper_in_prompt=8,
146
  temperature=0.4,