Spaces:
Runtime error
Runtime error
shubhamagarwal92
commited on
Commit
β’
de4cd5c
1
Parent(s):
0abb1ea
Initial commit
Browse files- app.py +611 -0
- requirements.txt +5 -0
- resources/download.png +0 -0
- resources/prompts.json +11 -0
app.py
ADDED
@@ -0,0 +1,611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://huggingface.co/spaces/MAGAer13/mPLUG-Owl/blob/main/app.py
|
2 |
+
# https://huggingface.co/spaces/MAGAer13/mPLUG-Owl2
|
3 |
+
# https://github.com/allenai/s2-folks/blob/main/examples/python/find_and_recommend_papers/find_papers.py
|
4 |
+
# https://www.gradio.app/guides/creating-a-chatbot-fast
|
5 |
+
# https://huggingface.co/spaces/librarian-bots/recommend_similar_papers/blob/main/app.py
|
6 |
+
# https://huggingface.co/spaces/badayvedat/LLaVA
|
7 |
+
"""
|
8 |
+
This file demos a simple chatbot based on gradio and openai api
|
9 |
+
"""
|
10 |
+
import pathlib, json
|
11 |
+
import time
|
12 |
+
import gradio as gr
|
13 |
+
import os
|
14 |
+
import re
|
15 |
+
import argparse
|
16 |
+
import requests
|
17 |
+
import openai
|
18 |
+
from typing import Any
|
19 |
+
import datetime
|
20 |
+
import pandas as pd
|
21 |
+
from evaluate import load
|
22 |
+
|
23 |
+
# Set openai credentials
|
24 |
+
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
25 |
+
S2_API_KEY = os.getenv('S2_API_KEY')
|
26 |
+
|
27 |
+
# Function to set the OpenAI API key
|
28 |
+
def set_apikey(api_key):
|
29 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
30 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
31 |
+
return "OpenAI API key is Set"
|
32 |
+
|
33 |
+
|
34 |
+
def get_conv_log_filename():
|
35 |
+
t = datetime.datetime.now()
|
36 |
+
cur_dir = pathlib.Path(__file__).parent.resolve()
|
37 |
+
log_dir = f"{cur_dir}/logs/"
|
38 |
+
os.makedirs(log_dir, exist_ok=True)
|
39 |
+
name = os.path.join(log_dir, f"{t.year}-{t.month:02d}-{t.day:02d}-conv.json")
|
40 |
+
return name
|
41 |
+
|
42 |
+
|
43 |
+
def vote_last_response(state, vote_type, request: gr.Request):
|
44 |
+
with open(get_conv_log_filename(), "a") as fout:
|
45 |
+
data = {
|
46 |
+
"tstamp": round(time.time(), 4),
|
47 |
+
"type": vote_type,
|
48 |
+
"state": state, #.dict(),
|
49 |
+
"ip": request.client.host,
|
50 |
+
}
|
51 |
+
fout.write(json.dumps(data) + "\n")
|
52 |
+
|
53 |
+
|
54 |
+
def upvote_last_response(state, request: gr.Request):
|
55 |
+
# logger.info(f"upvote. ip: {request.client.host}")
|
56 |
+
vote_last_response(state, "upvote", request)
|
57 |
+
return ""
|
58 |
+
|
59 |
+
|
60 |
+
def downvote_last_response(state, request: gr.Request):
|
61 |
+
# logger.info(f"downvote. ip: {request.client.host}")
|
62 |
+
vote_last_response(state, "downvote", request)
|
63 |
+
return ""
|
64 |
+
|
65 |
+
|
66 |
+
# example_abstract = """We explore the zero-shot abilities of recent large language models (LLMs) for the task of writing the literature review of a scientific research paper conditioned on its abstract and the content of related papers.
|
67 |
+
# We propose and examine a novel strategy for literature review generation with an LLM in which we first generate a plan for the review, and then use it to generate the actual text. While modern LLMs can easily be trained or prompted to
|
68 |
+
# condition on all abstracts of papers to be cited to generate a literature review without such intermediate plans, our empirical study shows that these intermediate plans improve the quality of generated literature reviews over vanilla
|
69 |
+
# zero-shot generation. Furthermore, we also create a new test corpus consisting of recent arXiv papers (with full content) posted after both open-sourced and closed-sourced LLMs that were used in our study were released. This allows us
|
70 |
+
# to ensure that our zero-shot experiments do not suffer from test set contamination.
|
71 |
+
# """
|
72 |
+
|
73 |
+
example_abstract = """We explore the zero-shot abilities of recent large language models (LLMs) for the task of writing the literature review of a scientific research paper conditioned on its abstract and the content of related papers."""
|
74 |
+
|
75 |
+
examples_html = [
|
76 |
+
f"<div style='text-align: left;'>{example_abstract}</div>"
|
77 |
+
]
|
78 |
+
# Create a custom HTML block to left-align text
|
79 |
+
custom_html = "<div style='text-align: left;'>Examples:</div>"
|
80 |
+
|
81 |
+
|
82 |
+
title_markdown = ("""
|
83 |
+
<h1 align="center"><a href=""><img src="/file=resources/download.png", alt="Writing Assistant - LitGen" border="0" style="margin: 0 auto; height: 50px;" /></a> </h1>
|
84 |
+
<h2 align="center">ReviewGPT - Literature review generation for research idea</h2>
|
85 |
+
""")
|
86 |
+
# <h5 align="center"> If you like our project, please give us a star β¨ on Github for latest update. </h2>
|
87 |
+
|
88 |
+
tos_markdown = ("""
|
89 |
+
### Terms of use
|
90 |
+
By using this service, users are required to agree to the following terms:
|
91 |
+
The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user data for future research.
|
92 |
+
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
93 |
+
""")
|
94 |
+
|
95 |
+
|
96 |
+
learn_more_markdown = ("""
|
97 |
+
### License
|
98 |
+
The service is a research preview intended for non-commercial use only, subject to the [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
99 |
+
""")
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
block_css = """
|
104 |
+
|
105 |
+
#buttons button {
|
106 |
+
min-width: min(120px,100%);
|
107 |
+
}
|
108 |
+
|
109 |
+
#display_mrkdwn {
|
110 |
+
display: block;
|
111 |
+
border-width: var(--block-border-width);
|
112 |
+
border-color: var(--block-border-color);
|
113 |
+
border-radius: var(--block-radius);
|
114 |
+
background: var(--block-background-fill);
|
115 |
+
padding: var(--input-padding);
|
116 |
+
}
|
117 |
+
|
118 |
+
.gallery.svelte-1viwdyg {
|
119 |
+
text-align: justify;
|
120 |
+
}
|
121 |
+
|
122 |
+
"""
|
123 |
+
|
124 |
+
def parse_arxiv_id_from_paper_url(url):
|
125 |
+
arxiv_id = url.split("/")[-1]
|
126 |
+
if arxiv_id[-4:] == ".pdf":
|
127 |
+
arxiv_id = arxiv_id[:-4]
|
128 |
+
return arxiv_id
|
129 |
+
|
130 |
+
|
131 |
+
def load_json(path: str) -> Any:
|
132 |
+
"""
|
133 |
+
This function opens and JSON file path
|
134 |
+
and loads in the JSON file.
|
135 |
+
|
136 |
+
:param path: Path to JSON file
|
137 |
+
:type path: str
|
138 |
+
:return: the loaded JSON file
|
139 |
+
:rtype: dict
|
140 |
+
"""
|
141 |
+
with open(path, "r", encoding="utf-8") as file:
|
142 |
+
json_object = json.load(file)
|
143 |
+
return json_object
|
144 |
+
|
145 |
+
|
146 |
+
def load_all_prompts(file_path: str = None) -> str:
|
147 |
+
"""
|
148 |
+
Loads the api key from json file path
|
149 |
+
|
150 |
+
:param file_path:
|
151 |
+
:return:
|
152 |
+
"""
|
153 |
+
cur_dir = pathlib.Path(__file__).parent.resolve()
|
154 |
+
# Load prompts from file
|
155 |
+
if not file_path:
|
156 |
+
# Default file path
|
157 |
+
file_path = f"{cur_dir}/resources/prompts.json"
|
158 |
+
prompts = load_json(file_path)
|
159 |
+
|
160 |
+
return prompts
|
161 |
+
|
162 |
+
|
163 |
+
def run_open_ai_api(json_data, model_name="gpt-4", max_tokens: int = 500, temperature: float = 0.2) -> str:
|
164 |
+
"""
|
165 |
+
This function actually calls the OpenAI API
|
166 |
+
Models such as gpt-4-32k, gpt-4-1106-preview
|
167 |
+
:param json_data:
|
168 |
+
:return:
|
169 |
+
"""
|
170 |
+
|
171 |
+
completion = openai.ChatCompletion.create(
|
172 |
+
model=model_name,
|
173 |
+
max_tokens=max_tokens,
|
174 |
+
temperature=temperature,
|
175 |
+
messages=[
|
176 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
177 |
+
{"role": "user", "content": f"{json_data['prompt']}"},
|
178 |
+
],
|
179 |
+
)
|
180 |
+
# stream=True
|
181 |
+
# partial_message = ""
|
182 |
+
# for chunk in completion:
|
183 |
+
# if len(chunk['choices'][0]['delta']) != 0:
|
184 |
+
# partial_message = partial_message + chunk['choices'][0]['delta']['content']
|
185 |
+
# yield partial_message
|
186 |
+
return completion["choices"][0]["message"]["content"]
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
def format_results_into_markdown(recommendations):
|
191 |
+
comment = "The following papers were found by the Semantic Scholar API \n\n"
|
192 |
+
for index, r in enumerate(recommendations):
|
193 |
+
# hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
|
194 |
+
# comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
|
195 |
+
comment += f"[{index+1}] [{r['title']}]({r['url']}) ({r['year']}) Cited by {r['citationCount']}\n"
|
196 |
+
return comment
|
197 |
+
|
198 |
+
def find_basis_paper(query, num_papers_api=20):
|
199 |
+
fields = 'title,url,abstract,citationCount,journal,isOpenAccess,fieldsOfStudy,year,journal'
|
200 |
+
rsp = requests.get('https://api.semanticscholar.org/graph/v1/paper/search',
|
201 |
+
headers={'X-API-KEY': S2_API_KEY},
|
202 |
+
params={'query': query, 'limit': num_papers_api, 'fields': fields})
|
203 |
+
rsp.raise_for_status()
|
204 |
+
results = rsp.json()
|
205 |
+
total = results["total"]
|
206 |
+
if not total:
|
207 |
+
print('No matches found. Please try another query.')
|
208 |
+
|
209 |
+
print(f'Found {total} results. Showing up to {num_papers_api}.')
|
210 |
+
papers = results['data']
|
211 |
+
# df = pd.DataFrame(papers)
|
212 |
+
return papers #[:result_limit]
|
213 |
+
|
214 |
+
def get_recommendations_from_semantic_scholar(url: str, num_papers_api=20):
|
215 |
+
"""
|
216 |
+
https://www.semanticscholar.org/product/api/tutorial
|
217 |
+
"""
|
218 |
+
fields = 'title,url,abstract,citationCount,journal,isOpenAccess,fieldsOfStudy,year,journal'
|
219 |
+
arxiv_id = parse_arxiv_id_from_paper_url(url)
|
220 |
+
query_id = f"ArXiv:{arxiv_id}"
|
221 |
+
|
222 |
+
rsp = requests.post(
|
223 |
+
"https://api.semanticscholar.org/recommendations/v1/papers/",
|
224 |
+
json={
|
225 |
+
"positivePaperIds": [query_id],
|
226 |
+
},
|
227 |
+
params={"fields": fields, "limit": num_papers_api},
|
228 |
+
)
|
229 |
+
|
230 |
+
results = rsp.json()
|
231 |
+
papers = results['recommendedPapers']
|
232 |
+
return papers
|
233 |
+
|
234 |
+
def get_paper_data(paper_url):
|
235 |
+
"""
|
236 |
+
Retrieves data of one paper based on URL
|
237 |
+
"""
|
238 |
+
fields = 'title,url,abstract,citationCount,journal,isOpenAccess,fieldsOfStudy,year,journal'
|
239 |
+
rsp = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/URL:{paper_url}",
|
240 |
+
headers={'X-API-KEY': S2_API_KEY},
|
241 |
+
params={'fields': fields})
|
242 |
+
results = rsp.json()
|
243 |
+
return results
|
244 |
+
|
245 |
+
|
246 |
+
def sort_papers(papers, sort_by):
|
247 |
+
"""
|
248 |
+
sort by categories: "Relevance", "Citations", "Year
|
249 |
+
"""
|
250 |
+
df = pd.DataFrame(papers)
|
251 |
+
if sort_by == "Citations":
|
252 |
+
df = df.sort_values(by="citationCount", ascending=False)
|
253 |
+
elif sort_by == "Year":
|
254 |
+
df = df.sort_values(by="year", ascending=False)
|
255 |
+
papers_list = df.to_dict(orient='records')
|
256 |
+
return papers_list
|
257 |
+
|
258 |
+
|
259 |
+
def get_markdown_query_text(papers):
|
260 |
+
display_markdown = format_results_into_markdown(papers)
|
261 |
+
cite_text = format_abstracts_as_references(papers)
|
262 |
+
return display_markdown, cite_text
|
263 |
+
|
264 |
+
|
265 |
+
def filter_recommendations(recommendations, max_paper_count=5):
|
266 |
+
# include only arxiv papers
|
267 |
+
arxiv_paper = [
|
268 |
+
r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
|
269 |
+
]
|
270 |
+
if len(arxiv_paper) > max_paper_count:
|
271 |
+
arxiv_paper = arxiv_paper[:max_paper_count]
|
272 |
+
return arxiv_paper
|
273 |
+
|
274 |
+
# def format_recommendation_into_markdown(arxiv_id, recommendations):
|
275 |
+
# comment = "The following papers were recommended by the Semantic Scholar API \n\n"
|
276 |
+
# for r in recommendations:
|
277 |
+
# hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
|
278 |
+
# comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
|
279 |
+
# return comment
|
280 |
+
|
281 |
+
|
282 |
+
def format_abstracts_as_references(papers):
|
283 |
+
# cite_list = ["@cite_1", "@cite_2", "@cite_3"]
|
284 |
+
cite_text = ""
|
285 |
+
for index, paper in enumerate(papers):
|
286 |
+
# citation = f"@cite_{index+1}"
|
287 |
+
citation = f"{index+1}"
|
288 |
+
cite_text = f"{cite_text}[{citation}]: {paper['abstract']}\n"
|
289 |
+
return cite_text
|
290 |
+
|
291 |
+
def format_prompt(base_prompt, abstract, cite_text, plan=""):
|
292 |
+
if plan:
|
293 |
+
data = f"Abstract: {abstract} \n {cite_text} \n Plan: {plan}"
|
294 |
+
else:
|
295 |
+
data = f"Abstract: {abstract} \n {cite_text}"
|
296 |
+
complete_prompt = f"{base_prompt}\n```{data}```"
|
297 |
+
return complete_prompt
|
298 |
+
|
299 |
+
def get_complete_prompt_for_summarization(base_prompt, data):
|
300 |
+
"""
|
301 |
+
This prompt helps in getting keywords to be used by S2 API
|
302 |
+
"""
|
303 |
+
complete_prompt = f"{base_prompt}\n```Abstract: {data}```"
|
304 |
+
return complete_prompt
|
305 |
+
|
306 |
+
def check_matching_paper(wer, abstract, papers, check_papers: int =3, wer_threshold = 0.12):
|
307 |
+
"""
|
308 |
+
Check if the user put the abstract of already existing paper and it is in the retrieved papers.
|
309 |
+
Using Word error rate as the metric on the top check_papers
|
310 |
+
"""
|
311 |
+
references = [abstract]
|
312 |
+
for i in range(check_papers):
|
313 |
+
predictions = [papers[i]['abstract']]
|
314 |
+
wer_score = wer.compute(predictions=predictions, references=references)
|
315 |
+
if wer_score < wer_threshold:
|
316 |
+
papers.pop(i)
|
317 |
+
return papers
|
318 |
+
return papers
|
319 |
+
|
320 |
+
|
321 |
+
class GradioChatApp:
|
322 |
+
"""
|
323 |
+
Class to define Gradio based chat app
|
324 |
+
"""
|
325 |
+
|
326 |
+
def __init__(self):
|
327 |
+
self.name = "GradioChatApp"
|
328 |
+
self.prompts = load_all_prompts()
|
329 |
+
self.role_template = self.prompts["role_template"]
|
330 |
+
self.plan_prompt = self.prompts["plan_template"]
|
331 |
+
self.vanilla_prompt = self.prompts["vanilla_template"]
|
332 |
+
self.sample_plan = self.prompts["plan"]
|
333 |
+
self.summarization_prompt = self.prompts["summarization_template"]
|
334 |
+
self.ranking_prompt = self.prompts["ranking_template"]
|
335 |
+
self.wer = load("wer")
|
336 |
+
|
337 |
+
def add_text(self, history, text, base_paper_textbox, keyword_textbox, rerank: bool = True,
|
338 |
+
num_papers: int = 3, model_name="gpt-4", sort_by="relevance", temperature = 0.2, max_tokens = 300, num_papers_api: int = 20):
|
339 |
+
"""
|
340 |
+
Add text to history
|
341 |
+
"""
|
342 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
343 |
+
raise gr.Error('Upload your OpenAI API key')
|
344 |
+
|
345 |
+
history = history + [(f"User provided abstract: \n {text}", None)]
|
346 |
+
# print("All textboxes:", plan_textbox, base_paper_textbox, keyword_textbox)
|
347 |
+
try:
|
348 |
+
if base_paper_textbox:
|
349 |
+
hist_response = f"Finding recommendations from S2 API based on the paper \n {base_paper_textbox}"
|
350 |
+
papers = get_recommendations_from_semantic_scholar(base_paper_textbox, num_papers_api)
|
351 |
+
else:
|
352 |
+
if keyword_textbox:
|
353 |
+
query = keyword_textbox
|
354 |
+
else:
|
355 |
+
# query = "multi document summarization"
|
356 |
+
prompt = get_complete_prompt_for_summarization(self.summarization_prompt, text)
|
357 |
+
json_data = {"prompt": prompt}
|
358 |
+
query = run_open_ai_api(json_data, model_name=model_name, max_tokens=max_tokens, temperature=temperature)
|
359 |
+
# print(query)
|
360 |
+
hist_response = f"LLM summarized keyword query to be used for S2 API: \n {query}"
|
361 |
+
papers = find_basis_paper(query, num_papers_api)
|
362 |
+
except:
|
363 |
+
history = history + [("No papers found using S2. Try providing keywords or a seed paper!", None)]
|
364 |
+
return history, "", "No papers found", "", "", ""
|
365 |
+
if not papers:
|
366 |
+
history = history + [("No papers found using S2. Try providing keywords or changing seed paper!", None)]
|
367 |
+
return history, "", "No papers found", "", "", ""
|
368 |
+
history = history + [(hist_response, None)]
|
369 |
+
# print(rerank, sort_by)
|
370 |
+
try:
|
371 |
+
papers = check_matching_paper(self.wer, text, papers)
|
372 |
+
except:
|
373 |
+
print("WER failed")
|
374 |
+
papers = sort_papers(papers, sort_by)
|
375 |
+
display_markdown, cite_text = get_markdown_query_text(papers)
|
376 |
+
if rerank == "True":
|
377 |
+
# print(f"{self.role_template}{self.ranking_prompt}")
|
378 |
+
try:
|
379 |
+
complete_prompt = format_prompt(base_prompt=f"{self.role_template} {self.ranking_prompt}", abstract=text, cite_text=cite_text)
|
380 |
+
json_data = {"prompt": complete_prompt}
|
381 |
+
response = run_open_ai_api(json_data, model_name=model_name, max_tokens=max_tokens, temperature=temperature)
|
382 |
+
# print(response)
|
383 |
+
# [1] > [2] > [4] > [3] > [6] > [5]
|
384 |
+
new_order = [int(s) for s in re.findall(r'\d+', response)]
|
385 |
+
# print(new_order)
|
386 |
+
papers = [papers[i-1] for i in new_order]
|
387 |
+
except:
|
388 |
+
print("LLM not able to rerank!")
|
389 |
+
|
390 |
+
# If paper based on seed paper, insert it at 0th index
|
391 |
+
if base_paper_textbox:
|
392 |
+
try:
|
393 |
+
base_paper_data = get_paper_data(paper_url=base_paper_textbox)
|
394 |
+
papers.insert(0,base_paper_data)
|
395 |
+
except:
|
396 |
+
print("Cant retrieve data for base paper!")
|
397 |
+
papers = papers[:num_papers]
|
398 |
+
display_markdown, cite_text = get_markdown_query_text(papers)
|
399 |
+
|
400 |
+
return history, text, display_markdown, cite_text, base_paper_textbox, keyword_textbox
|
401 |
+
|
402 |
+
def bot(self, history, cite_text, text, plan_textbox, request: gr.Request, model_name="gpt-4",
|
403 |
+
temperature = 0.2, max_tokens = 300, regenerate: bool = False):
|
404 |
+
"""
|
405 |
+
Calls the openai api
|
406 |
+
"""
|
407 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
408 |
+
raise gr.Error('Upload your OpenAI API key')
|
409 |
+
|
410 |
+
# Cache headers, ip address
|
411 |
+
# if request:
|
412 |
+
# print("Request headers dictionary:", request.headers)
|
413 |
+
# print("IP address:", request.client.host)
|
414 |
+
if cite_text =="":
|
415 |
+
return "How may I help?"
|
416 |
+
if plan_textbox:
|
417 |
+
complete_prompt = format_prompt(base_prompt=self.plan_prompt, abstract=text, cite_text=cite_text, plan=plan_textbox)
|
418 |
+
# history = history + [(f"Using plan: \n {plan_textbox}", None)]
|
419 |
+
else:
|
420 |
+
self.vanilla_prompt = self.vanilla_prompt.format(max_tokens=max_tokens)
|
421 |
+
# print(self.vanilla_prompt)
|
422 |
+
complete_prompt = format_prompt(base_prompt=self.vanilla_prompt, abstract=text, cite_text=cite_text, plan="")
|
423 |
+
# print(complete_prompt)
|
424 |
+
|
425 |
+
# if regenerate=="True":
|
426 |
+
# history.pop()
|
427 |
+
# print(complete_prompt)
|
428 |
+
json_data = {"prompt": complete_prompt}
|
429 |
+
response = run_open_ai_api(json_data, model_name=model_name, max_tokens=max_tokens, temperature=temperature)
|
430 |
+
|
431 |
+
history[-1][1] = ""
|
432 |
+
for character in response:
|
433 |
+
history[-1][1] += str(character)
|
434 |
+
time.sleep(0.005)
|
435 |
+
yield history
|
436 |
+
# history[-1][1] = response
|
437 |
+
# time.sleep(1)
|
438 |
+
# yield history
|
439 |
+
|
440 |
+
def launch_app(self):
|
441 |
+
"""
|
442 |
+
Gradio app defined here
|
443 |
+
"""
|
444 |
+
# Close all apps running on servers
|
445 |
+
gr.close_all()
|
446 |
+
textbox = gr.Textbox(lines=2, show_label=False, placeholder="Enter the abstract of your paper", container=False)
|
447 |
+
plan_textbox = gr.Textbox(show_label=False, placeholder="Enter sentence plan (Default none). Example: Cite [1] on line 2.", container=False)
|
448 |
+
base_paper_textbox = gr.Textbox(show_label=False, placeholder="Provide link of most relevant paper", container=False)
|
449 |
+
keyword_textbox = gr.Textbox(show_label=False, placeholder="Enter optional keywords for querying (Default none)", container=False)
|
450 |
+
|
451 |
+
with gr.Blocks(title="Writing Assistant", theme=gr.themes.Default(), css=block_css) as demo:
|
452 |
+
prompt = gr.State()
|
453 |
+
|
454 |
+
gr.Markdown(title_markdown)
|
455 |
+
# with gr.Row():
|
456 |
+
# gr.Image("resources/download.png", width=64, height=64)
|
457 |
+
with gr.Accordion("How to use (click to expand)", open=False):
|
458 |
+
gr.Markdown(
|
459 |
+
"""
|
460 |
+
Search and write literature review for your research idea/proposal or a draft abstract with this powerful AI tool.
|
461 |
+
TLDR; We query Semantic Scholar (S2) to retrieve relevant papers and optionally rerank them using another LLM.
|
462 |
+
With the principles of Retrieval Augmented Generation, LLM generates the related work section for your paper.
|
463 |
+
|
464 |
+
There are three strategies for AI search:
|
465 |
+
* We summarize your abstract with GPT-4 to get keywords which are then used to search S2
|
466 |
+
* You provide keywords that could be used as a search query
|
467 |
+
* Provide a seed paper used for recommendation
|
468 |
+
|
469 |
+
For generation, you could also provide a sentence plan to the LLM which contains the number of sentences and citations to produce
|
470 |
+
"""
|
471 |
+
)
|
472 |
+
|
473 |
+
with gr.Row():
|
474 |
+
with gr.Column(scale=3):
|
475 |
+
|
476 |
+
# TODO: OpenAI Keys
|
477 |
+
# with gr.Accordion("OpenAI key", open=False) as key_row:
|
478 |
+
# with gr.Row():
|
479 |
+
# api_key = gr.Textbox(placeholder='Enter OpenAI API key', show_label=False, interactive=True, scale=3)
|
480 |
+
# change_api_key = gr.Button('Change Key', scale=1)
|
481 |
+
|
482 |
+
with gr.Accordion("Ranking Parameters", open=False) as parameter_row:
|
483 |
+
# https://platform.openai.com/docs/models/overview
|
484 |
+
model_name = gr.Dropdown(["gpt-3.5-turbo", "gpt-4", "gpt-4-32k", "gpt-4-1106-preview"], value="gpt-4", interactive=True, label="Model") # scale=1, min_width=0
|
485 |
+
num_papers = gr.Slider(minimum=1, maximum=10, value=4, step=1, interactive=True, label="Cite # papers")
|
486 |
+
sort_by = gr.Dropdown(["Relevance", "Citations", "Year"], value="Relevance", interactive=True, label="Sort by") # scale=1, min_width=0
|
487 |
+
llm_rerank = gr.Radio(choices=["True", "False"], value="True", interactive=True, label="LLM Re-rank (May override sorting)")
|
488 |
+
with gr.Row():
|
489 |
+
temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature", scale=1)
|
490 |
+
max_tokens = gr.Slider(minimum=0, maximum=3000, value=800, step=64, interactive=True, label="Max output tokens", scale=2)
|
491 |
+
display_1 = gr.Markdown(value=f"Retrieved papers", label="Retrieved papers!", elem_id="display_mrkdwn") #, visible=True)
|
492 |
+
# with gr.Accordion("Generation Parameters", open=False) as parameter_row:
|
493 |
+
# top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P")
|
494 |
+
|
495 |
+
with gr.Column(scale=8):
|
496 |
+
chatbot = gr.Chatbot(elem_id="Chatbot", label="ReviewGPT", height=480)
|
497 |
+
|
498 |
+
with gr.Row():
|
499 |
+
with gr.Column(scale=3):
|
500 |
+
|
501 |
+
with gr.Accordion("Example", open=False) as example_row:
|
502 |
+
gr.Examples(label="Example: Abstract", examples=[
|
503 |
+
[example_abstract],
|
504 |
+
], inputs=[textbox], elem_id="example_abstract")
|
505 |
+
gr.Examples(label="Example: Query keywords (Optional)", examples=[
|
506 |
+
["multi document summarization of scientific articles"],
|
507 |
+
], inputs=[keyword_textbox])
|
508 |
+
gr.Examples(label="Example: Most relevant paper (Optional)", examples=[
|
509 |
+
["https://arxiv.org/abs/2010.14235"],
|
510 |
+
], inputs=[base_paper_textbox])
|
511 |
+
gr.Examples(label="Example: Sentence plan (Optional)", examples=[
|
512 |
+
[self.sample_plan],
|
513 |
+
], inputs=[plan_textbox])
|
514 |
+
|
515 |
+
with gr.Column(scale=8):
|
516 |
+
|
517 |
+
with gr.Row():
|
518 |
+
with gr.Column(scale=6):
|
519 |
+
textbox.render()
|
520 |
+
with gr.Column(scale=1, min_width=50):
|
521 |
+
submit_btn = gr.Button(value="Send", variant="primary")
|
522 |
+
with gr.Row():
|
523 |
+
gr.Markdown("""Optionally, improve the API Search by either providing keywords or a very relevant seed paper. Seed paper takes priority if provided both.""")
|
524 |
+
with gr.Row():
|
525 |
+
with gr.Column(scale=2):
|
526 |
+
keyword_textbox.render()
|
527 |
+
with gr.Column(scale=2):
|
528 |
+
base_paper_textbox.render()
|
529 |
+
with gr.Row():
|
530 |
+
gr.Markdown("""Optionally, provide a sentence plan to be used for generation""")
|
531 |
+
with gr.Row():
|
532 |
+
with gr.Column(scale=5):
|
533 |
+
plan_textbox.render()
|
534 |
+
with gr.Column(scale=2, min_width=50):
|
535 |
+
plan_generate_btn = gr.Button(value="Regenerate with plan", variant="primary")
|
536 |
+
with gr.Row(elem_id="buttons") as button_row:
|
537 |
+
upvote_btn = gr.Button(value="π Upvote")
|
538 |
+
downvote_btn = gr.Button(value="π Downvote")
|
539 |
+
# flag_btn = gr.Button(value="β οΈ Flag", interactive=False)
|
540 |
+
# #stop_btn = gr.Button(value="βΉοΈ Stop Generation", interactive=False)
|
541 |
+
regenerate_btn = gr.Button(value="π Regenerate")
|
542 |
+
clear_btn = gr.Button(value="ποΈ Clear")
|
543 |
+
|
544 |
+
gr.Markdown(tos_markdown)
|
545 |
+
gr.Markdown(learn_more_markdown)
|
546 |
+
|
547 |
+
|
548 |
+
btn_list = [regenerate_btn, clear_btn]
|
549 |
+
|
550 |
+
# TODO: OpenAI Keys
|
551 |
+
# api_key.submit(fn=set_apikey, inputs=[api_key], outputs=[api_key])
|
552 |
+
# change_api_key.click(fn=set_apikey, inputs=[api_key], outputs=[api_key])
|
553 |
+
|
554 |
+
|
555 |
+
textbox.submit(
|
556 |
+
self.add_text,
|
557 |
+
[chatbot, textbox, base_paper_textbox, keyword_textbox, llm_rerank, num_papers, model_name, sort_by, temperature, max_tokens],
|
558 |
+
[chatbot, textbox, display_1, prompt, base_paper_textbox, keyword_textbox],
|
559 |
+
queue=False
|
560 |
+
).then(
|
561 |
+
self.bot,
|
562 |
+
[chatbot, prompt, textbox, plan_textbox, model_name, temperature, max_tokens],
|
563 |
+
[chatbot]
|
564 |
+
)
|
565 |
+
submit_btn.click(
|
566 |
+
self.add_text,
|
567 |
+
[chatbot, textbox, base_paper_textbox, keyword_textbox, llm_rerank, num_papers, model_name, sort_by, temperature, max_tokens],
|
568 |
+
[chatbot, textbox, display_1, prompt, base_paper_textbox, keyword_textbox],
|
569 |
+
queue=False
|
570 |
+
).then(
|
571 |
+
self.bot,
|
572 |
+
[chatbot, prompt, textbox, plan_textbox, model_name, temperature, max_tokens],
|
573 |
+
[chatbot]
|
574 |
+
)
|
575 |
+
|
576 |
+
plan_generate_btn.click(self.bot,
|
577 |
+
[chatbot, prompt, textbox, plan_textbox, model_name, temperature, max_tokens],
|
578 |
+
[chatbot])
|
579 |
+
upvote_btn.click(upvote_last_response, prompt, [textbox], queue=False)
|
580 |
+
downvote_btn.click(downvote_last_response, prompt, [textbox], queue=False)
|
581 |
+
|
582 |
+
|
583 |
+
regenerate_btn.click(self.bot,
|
584 |
+
[chatbot, prompt, textbox, plan_textbox, model_name, temperature, max_tokens],
|
585 |
+
[chatbot])
|
586 |
+
# # state can also be cached https://github.com/gradio-app/gradio/issues/730
|
587 |
+
# txt.submit(self.add_text, [chatbot, txt], [chatbot, txt]).then(
|
588 |
+
# self.bot, chatbot, chatbot
|
589 |
+
# )
|
590 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
591 |
+
|
592 |
+
demo.launch(allowed_paths=["resources/"])
|
593 |
+
|
594 |
+
|
595 |
+
if __name__ == "__main__":
|
596 |
+
|
597 |
+
parser = argparse.ArgumentParser()
|
598 |
+
parser.add_argument("--host", type=str, default="0.0.0.0")
|
599 |
+
parser.add_argument("--debug", action="store_true", help="using debug mode")
|
600 |
+
parser.add_argument("--port", type=int)
|
601 |
+
parser.add_argument("--concurrency-count", type=int, default=1)
|
602 |
+
|
603 |
+
|
604 |
+
# demo = build_demo()
|
605 |
+
# demo.queue(concurrency_count=args.concurrency_count,
|
606 |
+
# status_update_rate=10, api_open=False).launch(server_name=args.host,
|
607 |
+
# debug=args.debug, server_port=args.port, share=False)
|
608 |
+
|
609 |
+
|
610 |
+
test_app = GradioChatApp()
|
611 |
+
test_app.launch_app()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
evaluate==0.4.0
|
2 |
+
jiwer==3.0.3
|
3 |
+
gradio==4.5.0
|
4 |
+
gradio_client==0.7.0
|
5 |
+
openai==0.27.8
|
resources/download.png
ADDED
resources/prompts.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"vanilla_template": "You will be provided with an abstract of a scientific document and other references papers in triple quotes. Your task is to write the related work section of the document using only the provided abstracts and other references papers. Please write the related work section creating a cohesive storyline by doing a critical analysis of prior work comparing the strengths and weaknesses while also motivating the proposed approach. You should cite the other related documents as [#] whenever you are referring it in the related work. Do not write it as Reference #. Do not cite abstract. Do not include any extra notes or newline characters at the end. Do not copy the abstracts of reference papers directly but compare and contrast to the main work concisely. Do not provide the output in bullet points. Do not provide references at the end. Please cite all the provided reference papers. Provide the output in maximum {max_tokens} words.",
|
3 |
+
"plan_template": "You will be provided with an abstract of a scientific document and other references papers in triple quotes. Your task is to write the related work section of the document using only the provided abstracts and other references papers. Please write the related work section creating a cohesive storyline by doing a critical analysis of prior work comparing the strengths and weaknesses while also motivating the proposed approach. You are also provided a sentence plan mentioning the total number of lines and the citations to refer in different lines. You should cite all the other related documents as [#] whenever you are referring it in the related work. Do not cite abstract. Do not include any extra notes or newline characters at the end. Do not copy the abstracts of reference papers directly but compare and contrast to the main work concisely. Do not provide the output in bullet points. Do not provide references at the end. Please cite all the provided reference papers. Please follow the plan when generating sentences, especially the number of lines to generate.",
|
4 |
+
"plan": "Generate the output in 200 words using 5 sentences. Cite [1] on line 2. Cite [2], [3] on line 3. Cite [4] on line 5.",
|
5 |
+
"cite_plan": "Please generate the output in 200 words using 5 sentences. Cite @cite_1 on line 2. Please cite @cite_2, @cite_3 on line 3",
|
6 |
+
"summarization_template": "You will be provided with an abstract of a scientific document. Your task is to summarize the abstract in 4 words to search for related papers using Semantic Scholar API",
|
7 |
+
"ranking_template": "You will be provided with an abstract or an idea of a scientific document and abstracts of some other relevant papers. Your task is to rank the papers based on the relevance to the query abstract. If the reference paper matches the query abstract completely, provide it a lower rank. Provide only the ranks as [] > [] > [].",
|
8 |
+
"role_template": "You are a helpful research assistant who is helping with literature review of a research idea."
|
9 |
+
}
|
10 |
+
|
11 |
+
|