Max Hager commited on
Commit
f4a4f9c
1 Parent(s): 8621a57
__pycache__/code.cpython-311.pyc DELETED
Binary file (9.89 kB)
 
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import gradio as gr
2
- from PIL import Image, ImageDraw, ImageFont
3
- import textwrap
4
  import requests
5
  from PIL import Image
6
  from io import BytesIO
@@ -169,5 +167,16 @@ def fetch_arxiv_image(arxiv_link):
169
  output_path = create_image_from_url(arxiv_id)
170
  return Image.open(output_path)
171
 
172
- demo = gr.Interface(fn=fetch_arxiv_image, inputs="text", outputs="image")
 
 
 
 
 
 
 
 
 
 
 
173
  demo.launch()
 
1
  import gradio as gr
 
 
2
  import requests
3
  from PIL import Image
4
  from io import BytesIO
 
167
  output_path = create_image_from_url(arxiv_id)
168
  return Image.open(output_path)
169
 
170
+ description_text = (
171
+ "It will only work with an arXiv link. Based on the arXiv paper, a summary of the paper is generated "
172
+ "and displayed in arXivGPT format (https://x.com/arXivGPT). Please input the arXiv link below."
173
+ )
174
+
175
+ demo = gr.Interface(
176
+ fn=fetch_arxiv_image,
177
+ inputs="text",
178
+ outputs="image",
179
+ allow_flagging="never",
180
+ description=description_text
181
+ )
182
  demo.launch()
code.py DELETED
@@ -1,166 +0,0 @@
1
- import requests
2
- import fitz # PyMuPDF
3
- import arxiv
4
- import tiktoken
5
- from openai import OpenAI
6
- import os
7
- import json
8
- from dotenv import load_dotenv
9
- from PIL import Image, ImageDraw, ImageFont
10
- import textwrap
11
- from datetime import datetime
12
-
13
- # Load environment variables from .env file
14
- load_dotenv()
15
-
16
- openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
17
-
18
- def download_and_extract_paper_info(arxiv_id, token_limit=120000, model="gpt-3.5-turbo"):
19
- search = arxiv.Search(id_list=[arxiv_id])
20
- paper = next(search.results())
21
-
22
- title = paper.title
23
- publish_date = paper.published.date()
24
-
25
- pdf_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
26
- response = requests.get(pdf_url)
27
- if response.status_code == 200:
28
- pdf_content = response.content
29
-
30
- doc = fitz.open(stream=pdf_content, filetype="pdf")
31
- text = ""
32
- encoding = tiktoken.encoding_for_model(model)
33
-
34
- for page in doc:
35
- page_text = page.get_text()
36
- text += page_text
37
-
38
- tokens = encoding.encode(text)
39
- if len(tokens) > token_limit:
40
- text = encoding.decode(tokens[:token_limit])
41
- break
42
-
43
- return {
44
- "title": title,
45
- "publish_date": publish_date,
46
- "full_text": text
47
- }
48
- else:
49
- print(f"Failed to download paper. Status code: {response.status_code}")
50
- return None
51
-
52
- def summarize_text(text):
53
- prompt = f"""
54
- You are getting the text version of an arxiv paper your goal is to provide a summary of the paper by providing bullet points which summarise the paper.
55
-
56
- It should be exact three bullet points which summarise the paper. Return your response in JSON format where the keys are the bullet points and the values are the summaries of the bullet points as following:
57
-
58
- {{
59
- "bullet_point_1": "content",
60
- "bullet_point_2": "content",
61
- "bullet_point_3": "content"
62
- }}
63
-
64
- Here is the text of the paper:
65
-
66
- {text}
67
- """
68
-
69
- completion = openai_client.chat.completions.create(
70
- model="gpt-4o-mini",
71
- response_format={ "type": "json_object" },
72
- messages=[
73
- {"role": "user", "content": prompt}
74
- ],
75
- temperature=0.0,
76
- )
77
-
78
- summary = completion.choices[0].message.content
79
- return summary
80
-
81
- def add_text_to_image(background_path, title, text_content, publish_date, output_path="output.jpg", scale_factor=2, offset=20):
82
- with Image.open(background_path) as img:
83
- width, height = img.size
84
- background = img.resize((width * scale_factor, height * scale_factor), Image.LANCZOS)
85
-
86
- draw = ImageDraw.Draw(background)
87
-
88
- title_font = ImageFont.truetype("fonts/Inika-Regular.ttf", 35 * scale_factor)
89
- content_font = ImageFont.truetype("fonts/Inika-Regular.ttf", 20 * scale_factor)
90
- date_font = ImageFont.truetype("fonts/Inika-Regular.ttf", 20 * scale_factor)
91
- arxiv_font = ImageFont.truetype("fonts/Larabieb.ttf", 50 * scale_factor)
92
-
93
- margin = 50 * scale_factor
94
- max_width = background.width - (2 * margin)
95
-
96
- # Dynamically calculate the width for wrapping the title
97
- wrapped_title = textwrap.wrap(title, width=int(max_width / (35 * scale_factor * 0.6)))
98
- y_text = 50 * scale_factor
99
-
100
- for line in wrapped_title:
101
- bbox = title_font.getbbox(line)
102
- line_width = bbox[2] - bbox[0]
103
- line_height = bbox[3] - bbox[1]
104
- x_text = (background.width - line_width) // 2
105
- draw.text((x_text, y_text), line, font=title_font, fill=(0, 0, 0))
106
- y_text += line_height + (10 * scale_factor)
107
-
108
- bullet_points = json.loads(text_content)
109
- total_height = sum(len(textwrap.wrap(value, width=90)) * (25 * scale_factor) + (20 * scale_factor) for value in bullet_points.values())
110
- y = (background.height - total_height) // 2
111
- bullet_width = content_font.getbbox("• ")[2]
112
- max_content_width = max(max(content_font.getbbox(line)[2] for line in textwrap.wrap(value, width=90)) for value in bullet_points.values())
113
- bullet_start_x = (background.width - max_content_width - bullet_width) // 2
114
-
115
- for value in bullet_points.values():
116
- wrapped_text = textwrap.wrap(value, width=90)
117
-
118
- for i, line in enumerate(wrapped_text):
119
- if i == 0:
120
- draw.text((bullet_start_x, y), "•", font=content_font, fill=(0, 0, 0))
121
- draw.text((bullet_start_x + bullet_width, y), line, font=content_font, fill=(0, 0, 0))
122
- else:
123
- draw.text((bullet_start_x + bullet_width, y + (25 * scale_factor * i)), line, font=content_font, fill=(0, 0, 0))
124
-
125
- y += (25 * scale_factor * len(wrapped_text)) + (20 * scale_factor)
126
-
127
- date_text = f"Published: {publish_date}"
128
- date_bbox = date_font.getbbox(date_text)
129
- date_height = date_bbox[3] - date_bbox[1]
130
- draw.text((margin, background.height - margin - date_height - offset), date_text, font=date_font, fill=(0, 0, 0))
131
-
132
- arxiv_text = "@arXivGPT"
133
- arxiv_bbox = arxiv_font.getbbox(arxiv_text)
134
- arxiv_width = arxiv_bbox[2] - arxiv_bbox[0]
135
- arxiv_height = arxiv_bbox[3] - arxiv_bbox[1]
136
- arxiv_x = background.width - margin - arxiv_width
137
- arxiv_y = background.height - margin - arxiv_height - offset
138
-
139
- pre_x_text = "@ar"
140
- pre_x_width = arxiv_font.getbbox(pre_x_text)[2]
141
- draw.text((arxiv_x, arxiv_y), pre_x_text, font=arxiv_font, fill=(0, 0, 0))
142
-
143
- x_text = "X"
144
- x_width = arxiv_font.getbbox(x_text)[2]
145
- draw.text((arxiv_x + pre_x_width, arxiv_y), x_text, font=arxiv_font, fill="#B31B1B")
146
-
147
- post_x_text = "ivGPT"
148
- draw.text((arxiv_x + pre_x_width + x_width, arxiv_y), post_x_text, font=arxiv_font, fill=(0, 0, 0))
149
-
150
- background.save(output_path, quality=95)
151
- print(f"High-resolution image saved as {output_path}")
152
-
153
- def create_image_from_url(arxiv_id, background_path="background.jpg", output_path="output.jpg"):
154
- paper_info = download_and_extract_paper_info(arxiv_id)
155
- if paper_info:
156
- title = paper_info.get("title")
157
- publish_date = paper_info.get("publish_date")
158
- full_text = paper_info.get("full_text")
159
- summary = summarize_text(full_text)
160
- add_text_to_image(background_path, title, summary, publish_date, output_path)
161
- return output_path
162
-
163
- # Example usage
164
- # if __name__ == "__main__":
165
- # arxiv_id = "2106.14881" # Replace with the actual arxiv_id
166
- # create_image_from_url(arxiv_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
output.jpg → example.jpg RENAMED
File without changes
flagged/log.csv DELETED
@@ -1,2 +0,0 @@
1
- arxiv_link,output,flag,username,timestamp
2
- https://arxiv.org/pdf/2408.01031,"{""path"":""flagged/output/dc780efd57f804eef677/image.JPEG"",""url"":null,""size"":null,""orig_name"":""image.JPEG"",""mime_type"":null}",,,2024-08-06 15:53:17.126503
 
 
 
flagged/output/dc780efd57f804eef677/image.JPEG DELETED
Binary file (386 kB)