Spaces:
Sleeping
Sleeping
Linh Nguyen
commited on
Commit
·
94aa2bb
1
Parent(s):
0eadab7
edit prompts
Browse files- config.yaml +5 -4
- src/relevancy.py +4 -2
- src/relevancy_prompt.txt +5 -4
config.yaml
CHANGED
@@ -3,13 +3,13 @@ topic: "Computer Science"
|
|
3 |
# An empty list here will include all categories in a topic
|
4 |
# Use the natural language names of the topics, found here: https://arxiv.org
|
5 |
# Including more categories will result in more calls to the large language model
|
6 |
-
categories: ["Artificial Intelligence", "Computation and Language"]
|
7 |
|
8 |
# Relevance score threshold. abstracts that receive a score less than this from the large language model
|
9 |
# will have their papers filtered out.
|
10 |
#
|
11 |
# Must be within 1-10
|
12 |
-
threshold:
|
13 |
|
14 |
# A natural language statement that the large language model will use to judge which papers are relevant
|
15 |
#
|
@@ -23,5 +23,6 @@ threshold: 7
|
|
23 |
interest: |
|
24 |
1. Large language model pretraining and finetunings
|
25 |
2. Multimodal machine learning
|
26 |
-
3.
|
27 |
-
4.
|
|
|
|
3 |
# An empty list here will include all categories in a topic
|
4 |
# Use the natural language names of the topics, found here: https://arxiv.org
|
5 |
# Including more categories will result in more calls to the large language model
|
6 |
+
categories: ["Artificial Intelligence", "Computation and Language", "Machine Learning"]
|
7 |
|
8 |
# Relevance score threshold. abstracts that receive a score less than this from the large language model
|
9 |
# will have their papers filtered out.
|
10 |
#
|
11 |
# Must be within 1-10
|
12 |
+
threshold: 6
|
13 |
|
14 |
# A natural language statement that the large language model will use to judge which papers are relevant
|
15 |
#
|
|
|
23 |
interest: |
|
24 |
1. Large language model pretraining and finetunings
|
25 |
2. Multimodal machine learning
|
26 |
+
3. RAGs
|
27 |
+
4. Optimization of LLM and GenAI
|
28 |
+
5. Do not care about specific application, for example, information extraction, summarization, etc.
|
src/relevancy.py
CHANGED
@@ -46,8 +46,10 @@ def post_process_chat_gpt_response(paper_data, response, threshold_score=8):
|
|
46 |
score_items = [
|
47 |
json.loads(re.sub(pattern, "", line))
|
48 |
for line in json_items if "relevancy score" in line.lower()]
|
49 |
-
except Exception:
|
50 |
pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
|
|
|
|
|
51 |
raise RuntimeError("failed")
|
52 |
pprint.pprint(score_items)
|
53 |
scores = []
|
@@ -136,7 +138,7 @@ def generate_relevance_score(
|
|
136 |
return ans_data, hallucination
|
137 |
|
138 |
def run_all_day_paper(
|
139 |
-
query={"interest":"", "subjects":["Computation and Language", "Artificial Intelligence"]},
|
140 |
date=None,
|
141 |
data_dir="../data",
|
142 |
model_name="gpt-3.5-turbo-16k",
|
|
|
46 |
score_items = [
|
47 |
json.loads(re.sub(pattern, "", line))
|
48 |
for line in json_items if "relevancy score" in line.lower()]
|
49 |
+
except Exception as e:
|
50 |
pprint.pprint([re.sub(pattern, "", line) for line in json_items if "relevancy score" in line.lower()])
|
51 |
+
print(e)
|
52 |
+
#raise e
|
53 |
raise RuntimeError("failed")
|
54 |
pprint.pprint(score_items)
|
55 |
scores = []
|
|
|
138 |
return ans_data, hallucination
|
139 |
|
140 |
def run_all_day_paper(
|
141 |
+
query={"interest":"Computer Science", "subjects":["Machine Learning", "Computation and Language", "Artificial Intelligence"]},
|
142 |
date=None,
|
143 |
data_dir="../data",
|
144 |
model_name="gpt-3.5-turbo-16k",
|
src/relevancy_prompt.txt
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
You have been asked to read a list of a few arxiv papers, each with title, authors and abstract.
|
2 |
-
Based on my specific research interests,
|
3 |
-
Additionally, please generate
|
4 |
Please keep the paper order the same as in the input list, with one json format per line. Example is:
|
5 |
-
1. {"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings"}
|
6 |
|
7 |
-
|
|
|
|
|
|
1 |
You have been asked to read a list of a few arxiv papers, each with title, authors and abstract.
|
2 |
+
Based on my specific research interests, relevancy score out of 10 for each paper, based on my specific research interest, with a higher score indicating greater relevance. A relevance score more than 7 will need person's attention for details.
|
3 |
+
Additionally, please generate summary, for each paper explaining why it's relevant to my research interests.
|
4 |
Please keep the paper order the same as in the input list, with one json format per line. Example is:
|
|
|
5 |
|
6 |
+
{"Relevancy score": "an integer score out of 10", "Reasons for match": "1-2 sentence short reasonings", "Goal":"Goal of the paper/What kind of pain points the paper is trying to solve?", "Data": "Short summary of the data source used in the paper", "Methodology": "Summary of methodologies authors described in the paper", "Experiments & Results": "Summary of results", "Git": "Link to the Github code repo (if available)", "Discussion & Next steps": "Further discussion and next steps of the research"}
|
7 |
+
|
8 |
+
My research interests are: NLP, RAGs, LLM, Optmization in Machine learning, Data science, Generative AI, Optimization in LLM, Finance modelling ...
|