richardmfan commited on
Commit
ed81dc9
Β·
unverified Β·
2 Parent(s): 3d747b5 eb44c5f

Merge pull request #13 from AutoLLM/fixes

Browse files
Files changed (4) hide show
  1. README.md +12 -1
  2. readme_images/hf_example.png +0 -0
  3. src/app.py +52 -27
  4. src/utils.py +1 -0
README.md CHANGED
@@ -1,6 +1,10 @@
1
  # ArxivDigest
2
  This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
3
 
 
 
 
 
4
  ## πŸ“š Contents
5
 
6
  - [What this repo does](#πŸ”-what-this-repo-does)
@@ -24,8 +28,15 @@ This repository offers a method to curate a daily digest, sorted by relevance, u
24
  * The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
25
  * The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
26
 
 
 
 
 
 
 
 
27
 
28
- ### Some examples:
29
 
30
  #### Digest Configuration:
31
  - Subject/Topic: Computer Science
 
1
  # ArxivDigest
2
  This repo aims to provide a better daily digest for newly published arXiv papers based on your own research interests and descriptions via relevancy ratings from GPT.
3
 
4
+ You can try it out at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest) using your own OpenAI api key.
5
+
6
+ You can also create a daily subscription pipeline to email you the results.
7
+
8
  ## πŸ“š Contents
9
 
10
  - [What this repo does](#πŸ”-what-this-repo-does)
 
28
  * The code pulls all the abstracts for papers in those categories and ranks how relevant they are to your interest on a scale of 1-10 using `gpt-3.5-turbo`.
29
  * The code then emits an HTML digest listing all the relevant papers, and optionally emails it to you using [SendGrid](https://sendgrid.com). You will need to have a SendGrid account with an API key for this functionality to work.
30
 
31
+ ### Testing it out with Hugging Face:
32
+
33
+ We provide a demo at [https://huggingface.co/spaces/AutoLLM/ArxivDigest](https://huggingface.co/spaces/AutoLLM/ArxivDigest). Simply enter your [OpenAI API key](https://platform.openai.com/account/api-keys) and then fill in the configuration on the right. Note that we do not store your key.
34
+
35
+ ![hfexample](./readme_images/hf_example.png)
36
+
37
+ You can also send yourself an email of the digest by creating a SendGrid account and [api key](https://app.SendGrid.com/settings/api_keys).
38
 
39
+ ### Some examples of results:
40
 
41
  #### Digest Configuration:
42
  - Subject/Topic: Computer Science
readme_images/hf_example.png ADDED
src/app.py CHANGED
@@ -1,9 +1,11 @@
1
  import gradio as gr
2
  from download_new_papers import get_papers
 
3
  from relevancy import generate_relevance_score, process_subject_fields
4
  from sendgrid.helpers.mail import Mail, Email, To, Content
5
  import sendgrid
6
  import os
 
7
 
8
  topics = {
9
  "Physics": "",
@@ -57,7 +59,9 @@ categories_map = {
57
 
58
 
59
  def sample(email, topic, physics_topic, categories, interest):
60
- if subject == "Physics":
 
 
61
  if isinstance(physics_topic, list):
62
  raise gr.Error("You must choose a physics topic.")
63
  topic = physics_topic
@@ -72,6 +76,7 @@ def sample(email, topic, physics_topic, categories, interest):
72
  else:
73
  papers = get_papers(abbr, limit=4)
74
  if interest:
 
75
  relevancy, _ = generate_relevance_score(
76
  papers,
77
  query={"interest": interest},
@@ -86,7 +91,6 @@ def change_subsubject(subject, physics_subject):
86
  if subject != "Physics":
87
  return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
88
  else:
89
- print(physics_subject)
90
  if physics_subject and not isinstance(physics_subject, list):
91
  return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
92
  else:
@@ -100,7 +104,9 @@ def change_physics(subject):
100
  return gr.Dropdown.update(physics_topics, visible=True)
101
 
102
 
103
- def test(email, topic, physics_topic, categories, interest):
 
 
104
  if topic == "Physics":
105
  if isinstance(physics_topic, list):
106
  raise gr.Error("You must choose a physics topic.")
@@ -116,19 +122,19 @@ def test(email, topic, physics_topic, categories, interest):
116
  else:
117
  papers = get_papers(abbr, limit=4)
118
  if interest:
 
119
  relevancy, hallucination = generate_relevance_score(
120
  papers,
121
  query={"interest": interest},
122
  threshold_score=7,
123
  num_paper_in_prompt=8)
124
- print(relevancy[0].keys())
125
  body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
126
  if hallucination:
127
  body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
128
  else:
129
  body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
130
- sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
131
- from_email = Email("") # CHANGE TO YOUR VERIFIED SENDER
132
  to_email = To(email)
133
  subject = "arXiv digest"
134
  content = Content("text/html", body)
@@ -138,33 +144,52 @@ def test(email, topic, physics_topic, categories, interest):
138
  # Send an HTTP POST request to /mail/send
139
  response = sg.client.mail.send.post(request_body=mail_json)
140
  if response.status_code >= 200 and response.status_code <= 300:
141
- return "Send test email: Success!"
142
  else:
143
- return f"Send test email: Failure ({response.status_code})"
 
144
 
 
 
145
 
146
  with gr.Blocks() as demo:
147
- with gr.Column():
148
- email = gr.Textbox(label="Email address")
149
- subject = gr.Radio(
150
- list(topics.keys()), label="Topic to subscribe to"
151
- )
152
- physics_subject = gr.Dropdown(physics_topics, value=[], multiselect=False, label="Physics category", visible=False, info="")
153
- subsubject = gr.Dropdown(
154
- [], value=[], multiselect=True, label="Subtopic", info="", visible=False)
155
- subject.change(fn=change_physics, inputs=[subject], outputs=physics_subject)
156
- subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
157
- physics_subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
158
-
159
-
160
- interest = gr.Textbox(label="A natural language description of what you are interested in. Press enter to update.")
161
- sample_output = gr.Textbox(label="Examples")
162
- test_btn = gr.Button("Send email")
163
- output = gr.Textbox(label="Test email status")
164
- test_btn.click(fn=test, inputs=[email, subject, physics_subject, subsubject, interest], outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
166
  physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
167
  subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
168
  interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
169
 
170
- demo.launch()
 
1
  import gradio as gr
2
  from download_new_papers import get_papers
3
+ import utils
4
  from relevancy import generate_relevance_score, process_subject_fields
5
  from sendgrid.helpers.mail import Mail, Email, To, Content
6
  import sendgrid
7
  import os
8
+ import openai
9
 
10
  topics = {
11
  "Physics": "",
 
59
 
60
 
61
  def sample(email, topic, physics_topic, categories, interest):
62
+ if not topic:
63
+ raise gr.Error("You must choose a topic.")
64
+ if topic == "Physics":
65
  if isinstance(physics_topic, list):
66
  raise gr.Error("You must choose a physics topic.")
67
  topic = physics_topic
 
76
  else:
77
  papers = get_papers(abbr, limit=4)
78
  if interest:
79
+ if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
80
  relevancy, _ = generate_relevance_score(
81
  papers,
82
  query={"interest": interest},
 
91
  if subject != "Physics":
92
  return gr.Dropdown.update(choices=categories_map[subject], value=[], visible=True)
93
  else:
 
94
  if physics_subject and not isinstance(physics_subject, list):
95
  return gr.Dropdown.update(choices=categories_map[physics_subject], value=[], visible=True)
96
  else:
 
104
  return gr.Dropdown.update(physics_topics, visible=True)
105
 
106
 
107
+ def test(email, topic, physics_topic, categories, interest, key):
108
+ if not email: raise gr.Error("Set your email")
109
+ if not key: raise gr.Error("Set your SendGrid key")
110
  if topic == "Physics":
111
  if isinstance(physics_topic, list):
112
  raise gr.Error("You must choose a physics topic.")
 
122
  else:
123
  papers = get_papers(abbr, limit=4)
124
  if interest:
125
+ if not openai.api_key: raise gr.Error("Set your OpenAI api key on the left first")
126
  relevancy, hallucination = generate_relevance_score(
127
  papers,
128
  query={"interest": interest},
129
  threshold_score=7,
130
  num_paper_in_prompt=8)
 
131
  body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}<br>Score: {paper["Relevancy score"]}<br>Reason: {paper["Reasons for match"]}' for paper in relevancy])
132
  if hallucination:
133
  body = "Warning: the model hallucinated some papers. We have tried to remove them, but the scores may not be accurate.<br><br>" + body
134
  else:
135
  body = "<br><br>".join([f'Title: <a href="{paper["main_page"]}">{paper["title"]}</a><br>Authors: {paper["authors"]}' for paper in papers])
136
+ sg = sendgrid.SendGridAPIClient(api_key=key)
137
+ from_email = Email(email)
138
  to_email = To(email)
139
  subject = "arXiv digest"
140
  content = Content("text/html", body)
 
144
  # Send an HTTP POST request to /mail/send
145
  response = sg.client.mail.send.post(request_body=mail_json)
146
  if response.status_code >= 200 and response.status_code <= 300:
147
+ return "Success!"
148
  else:
149
+ return "Failure: ({response.status_code})"
150
+
151
 
152
+ def register_openai_token(token):
153
+ openai.api_key = token
154
 
155
  with gr.Blocks() as demo:
156
+ with gr.Row():
157
+ with gr.Column(scale=1):
158
+ token = gr.Textbox(label="OpenAI API Key", type="password")
159
+ subject = gr.Radio(
160
+ list(topics.keys()), label="Topic"
161
+ )
162
+ physics_subject = gr.Dropdown(physics_topics, value=[], multiselect=False, label="Physics category", visible=False, info="")
163
+ subsubject = gr.Dropdown(
164
+ [], value=[], multiselect=True, label="Subtopic", info="Optional. Leaving it empty will use all subtopics.", visible=False)
165
+ subject.change(fn=change_physics, inputs=[subject], outputs=physics_subject)
166
+ subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
167
+ physics_subject.change(fn=change_subsubject, inputs=[subject, physics_subject], outputs=subsubject)
168
+
169
+ interest = gr.Textbox(label="A natural language description of what you are interested in. We will generate relevancy scores (1-10) and explanations for the papers in the selected topics according to this statement.", info="Press shift-enter or click the button below to update.", lines=7)
170
+ sample_btn = gr.Button("Generate Digest")
171
+ sample_output = gr.Textbox(label="Results for your configuration.", info="For runtime purposes, this is only done on a small subset of recent papers in the topic you have selected. Papers will not be filtered by relevancy, only sorted on a scale of 1-10.")
172
+ with gr.Column(scale=0.40):
173
+ with gr.Box():
174
+ title = gr.Markdown(
175
+ """
176
+ # Email Setup, Optional
177
+ Send an email to the below address using the configuration on the right. Requires a sendgrid token. These values are not needed to use the right side of this page.
178
+
179
+ To create a scheduled job for this, see our [Github Repository](https://github.com/AutoLLM/ArxivDigest)
180
+ """,
181
+ interactive=False, show_label=False)
182
+ email = gr.Textbox(label="Email address", type="email", placeholder="")
183
+ sendgrid_token = gr.Textbox(label="SendGrid API Key", type="password")
184
+ with gr.Row():
185
+ test_btn = gr.Button("Send email")
186
+ output = gr.Textbox(show_label=False, placeholder="email status")
187
+ test_btn.click(fn=test, inputs=[email, subject, physics_subject, subsubject, interest, sendgrid_token], outputs=output)
188
+ token.change(fn=register_openai_token, inputs=[token])
189
+ sample_btn.click(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
190
  subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
191
  physics_subject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
192
  subsubject.change(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
193
  interest.submit(fn=sample, inputs=[email, subject, physics_subject, subsubject, interest], outputs=sample_output)
194
 
195
+ demo.launch(show_api=False)
src/utils.py CHANGED
@@ -15,6 +15,7 @@ import copy
15
 
16
  StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
17
 
 
18
  openai_org = os.getenv("OPENAI_ORG")
19
  if openai_org is not None:
20
  openai.organization = openai_org
 
15
 
16
  StrOrOpenAIObject = Union[str, openai_object.OpenAIObject]
17
 
18
+
19
  openai_org = os.getenv("OPENAI_ORG")
20
  if openai_org is not None:
21
  openai.organization = openai_org