Spaces:
Sleeping
Sleeping
Merge pull request #13 from AutoLLM/fixes
Browse files- README.md +12 -1
- readme_images/hf_example.png +0 -0
- src/app.py +52 -27
- src/utils.py +1 -0
README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
# ArxivDigest
|
2 |
This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
|
3 |
|
|
|
|
|
|
|
|
|
4 |
## π Contents
|
5 |
|
6 |
- [What this repo does](#π-what-this-repo-does)
|
@@ -24,8 +28,15 @@ This repository offers a method to curate a daily digest, sorted by relevance, u
|
|
24 |
* The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
|
25 |
* The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
### Some examples:
|
29 |
|
30 |
#### Digest Configuration:
|
31 |
- Subject/Topic: Computer Science
|
|
|
1 |
# ArxivDigest
|
2 |
This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
|
3 |
|
4 |
+
You can try it out at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest) using your own OpenAI api key.
|
5 |
+
|
6 |
+
You can also create a daily subscription pipeline to email you the results.
|
7 |
+
|
8 |
## π Contents
|
9 |
|
10 |
- [What this repo does](#π-what-this-repo-does)
|
|
|
28 |
* The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
|
29 |
* The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
|
30 |
|
31 |
+
### Testing it out with Hugging Face:
|
32 |
+
|
33 |
+
We provide a demo at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest). Simply enter your [OpenAI API key](https://platform.openai.com/account/api-keys) and then fill in the configuration on the right. Note that we do not store your key.
|
34 |
+
|
35 |
+

|
36 |
+
|
37 |
+
You can also send yourself an email of the digest by creating a SendGrid account and [api key](https://app.SendGrid.com/settings/api_keys).
|
38 |
|
39 |
+
### Some examples of results:
|
40 |
|
41 |
#### Digest Configuration:
|
42 |
- Subject/Topic: Computer Science
|
readme_images/hf_example.png
ADDED
![]() |
src/app.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
from download_new_papers import get_papers
|
|
|
3 |
from relevancy import generate_relevance_score, process_subject_fields
|
4 |
from sendgrid.helpers.mail import Mail, Email, To, Content
|
5 |
import sendgrid
|
6 |
import os
|
|
|
7 |
|
8 |
topics = {
|
9 |
"Physics": "",
|
@@ -57,7 +59,9 @@ categories_map = {
|
|
57 |
|
58 |
|
59 |
def sample(email, topic, physics_topic, categories, interest):
|
60 |
-
if
|
|
|
|
|
61 |
if isinstance(physics_topic, list):
|
62 |
raise gr.Error("You must choose a physics topic.")
|
63 |
topic = physics_topic
|
@@ -72,6 +76,7 @@ def sample(email, topic, physics_topic, categories, interest):
|
|
72 |
else:
|
73 |
papers = get_papers(abbr, limit=4)
|
74 |
if interest:
|
|
|
75 |
relevancy, _ = generate_relevance_score(
|
76 |
papers,
|
77 |
query={"interest": interest},
|
@@ -86,7 +91,6 @@ def change_subsubject(subject, physics_subject):
|
|
86 |
if subject != "Physics":
|
87 |
return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
|
88 |
else:
|
89 |
-
print(physics_subject)
|
90 |
if physics_subject and not isinstance(physics_subject, list):
|
91 |
return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
|
92 |
else:
|
@@ -100,7 +104,9 @@ def change_physics(subject):
|
|
100 |
return gr.Dropdown.update(physics_topics, visible=True)
|
101 |
|
102 |
|
103 |
-
def test(email, topic, physics_topic, categories, interest):
|
|
|
|
|
104 |
if topic == "Physics":
|
105 |
if isinstance(physics_topic, list):
|
106 |
raise gr.Error("You must choose a physics topic.")
|
@@ -116,19 +122,19 @@ def test(email, topic, physics_topic, categories, interest):
|
|
116 |
else:
|
117 |
papers = get_papers(abbr, limit=4)
|
118 |
if interest:
|
|
|
119 |
relevancy, hallucination = generate_relevance_score(
|
120 |
papers,
|
121 |
query={"interest": interest},
|
122 |
threshold_score=7,
|
123 |
num_paper_in_prompt=8)
|
124 |
-
print(relevancy[0].keys())
|
125 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
|
126 |
if hallucination:
|
127 |
body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
|
128 |
else:
|
129 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
|
130 |
-
sg = sendgrid.SendGridAPIClient(api_key=
|
131 |
-
from_email = Email(
|
132 |
to_email = To(email)
|
133 |
subject = "arXiv digest"
|
134 |
content = Content("text/html", body)
|
@@ -138,33 +144,52 @@ def test(email, topic, physics_topic, categories, interest):
|
|
138 |
# Send an HTTP POST request to /mail/send
|
139 |
response = sg.client.mail.send.post(request_body=mail_json)
|
140 |
if response.status_code >= 200 and response.status_code <= 300:
|
141 |
-
return "
|
142 |
else:
|
143 |
-
return
|
|
|
144 |
|
|
|
|
|
145 |
|
146 |
with gr.Blocks() as demo:
|
147 |
-
with gr.
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
166 |
physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
167 |
subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
168 |
interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
169 |
|
170 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from download_new_papers import get_papers
|
3 |
+
import utils
|
4 |
from relevancy import generate_relevance_score, process_subject_fields
|
5 |
from sendgrid.helpers.mail import Mail, Email, To, Content
|
6 |
import sendgrid
|
7 |
import os
|
8 |
+
import openai
|
9 |
|
10 |
topics = {
|
11 |
"Physics": "",
|
|
|
59 |
|
60 |
|
61 |
def sample(email, topic, physics_topic, categories, interest):
|
62 |
+
if not topic:
|
63 |
+
raise gr.Error("You must choose a topic.")
|
64 |
+
if topic == "Physics":
|
65 |
if isinstance(physics_topic, list):
|
66 |
raise gr.Error("You must choose a physics topic.")
|
67 |
topic = physics_topic
|
|
|
76 |
else:
|
77 |
papers = get_papers(abbr, limit=4)
|
78 |
if interest:
|
79 |
+
if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
|
80 |
relevancy, _ = generate_relevance_score(
|
81 |
papers,
|
82 |
query={"interest": interest},
|
|
|
91 |
if subject != "Physics":
|
92 |
return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
|
93 |
else:
|
|
|
94 |
if physics_subject and not isinstance(physics_subject, list):
|
95 |
return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
|
96 |
else:
|
|
|
104 |
return gr.Dropdown.update(physics_topics, visible=True)
|
105 |
|
106 |
|
107 |
+
def test(email, topic, physics_topic, categories, interest, key):
|
108 |
+
if not email: raise gr.Error("Set your email")
|
109 |
+
if not key: raise gr.Error("Set your SendGrid key")
|
110 |
if topic == "Physics":
|
111 |
if isinstance(physics_topic, list):
|
112 |
raise gr.Error("You must choose a physics topic.")
|
|
|
122 |
else:
|
123 |
papers = get_papers(abbr, limit=4)
|
124 |
if interest:
|
125 |
+
if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
|
126 |
relevancy, hallucination = generate_relevance_score(
|
127 |
papers,
|
128 |
query={"interest": interest},
|
129 |
threshold_score=7,
|
130 |
num_paper_in_prompt=8)
|
|
|
131 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
|
132 |
if hallucination:
|
133 |
body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
|
134 |
else:
|
135 |
body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
|
136 |
+
sg = sendgrid.SendGridAPIClient(api_key=key)
|
137 |
+
from_email = Email(email)
|
138 |
to_email = To(email)
|
139 |
subject = "arXiv digest"
|
140 |
content = Content("text/html", body)
|
|
|
144 |
# Send an HTTP POST request to /mail/send
|
145 |
response = sg.client.mail.send.post(request_body=mail_json)
|
146 |
if response.status_code >= 200 and response.status_code <= 300:
|
147 |
+
return "Success!"
|
148 |
else:
|
149 |
+
return "Failure: ({response.status_code})"
|
150 |
+
|
151 |
|
152 |
+
def register_openai_token(token):
|
153 |
+
openai.api_key = token
|
154 |
|
155 |
with gr.Blocks() as demo:
|
156 |
+
with gr.Row():
|
157 |
+
with gr.Column(scale=1):
|
158 |
+
token = gr.Textbox(label="OpenAI API Key", type="password")
|
159 |
+
subject = gr.Radio(
|
160 |
+
list(topics.keys()), label="Topic"
|
161 |
+
)
|
162 |
+
physics_subject = gr.Dropdown(physics_topics, value=[], multiselect=False, label="Physics category", visible=False, info="")
|
163 |
+
subsubject = gr.Dropdown(
|
164 |
+
[], value=[], multiselect=True, label="Subtopic", info="Optional. Leaving it empty will use all subtopics.", visible=False)
|
165 |
+
subject.change(fn=change_physics, inputs=[subject], outputs=physics_subject)
|
166 |
+
subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
|
167 |
+
physics_subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
|
168 |
+
|
169 |
+
interest = gr.Textbox(label="A natural language description of what you are interested in. We will generate relevancy scores (1-10) and explanations for the papers in the selected topics according to this statement.", info="Press shift-enter or click the button below to update.", lines=7)
|
170 |
+
sample_btn = gr.Button("Generate Digest")
|
171 |
+
sample_output = gr.Textbox(label="Results for your configuration.", info="For runtime purposes, this is only done on a small subset of recent papers in the topic you have selected. Papers will not be filtered by relevancy, only sorted on a scale of 1-10.")
|
172 |
+
with gr.Column(scale=0.40):
|
173 |
+
with gr.Box():
|
174 |
+
title = gr.Markdown(
|
175 |
+
"""
|
176 |
+
# Email Setup, Optional
|
177 |
+
Send an email to the below address using the configuration on the right. Requires a sendgrid token. These values are not needed to use the right side of this page.
|
178 |
+
|
179 |
+
To create a scheduled job for this, see our [Github Repository](https://github.com/AutoLLM/ArxivDigest)
|
180 |
+
""",
|
181 |
+
interactive=False, show_label=False)
|
182 |
+
email = gr.Textbox(label="Email address", type="email", placeholder="")
|
183 |
+
sendgrid_token = gr.Textbox(label="SendGrid API Key", type="password")
|
184 |
+
with gr.Row():
|
185 |
+
test_btn = gr.Button("Send email")
|
186 |
+
output = gr.Textbox(show_label=False, placeholder="email status")
|
187 |
+
test_btn.click(fn=test, inputs=[email, subject, physics_subject, subsubject, interest, sendgrid_token], outputs=output)
|
188 |
+
token.change(fn=register_openai_token, inputs=[token])
|
189 |
+
sample_btn.click(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
190 |
subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
191 |
physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
192 |
subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
193 |
interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
|
194 |
|
195 |
+
demo.launch(show_api=False)
|
src/utils.py
CHANGED
@@ -15,6 +15,7 @@ import copy
|
|
15 |
|
16 |
StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
|
17 |
|
|
|
18 |
openai_org = os.getenv("OPENAI_ORG")
|
19 |
if openai_org is not None:
|
20 |
openai.organization = openai_org
|
|
|
15 |
|
16 |
StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
|
17 |
|
18 |
+
|
19 |
openai_org = os.getenv("OPENAI_ORG")
|
20 |
if openai_org is not None:
|
21 |
openai.organization = openai_org
|