Spaces:
Sleeping
Sleeping
jialicheng
commited on
Commit
•
c8c0cec
1
Parent(s):
d0e36a3
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,124 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
def search(a):
|
5 |
-
return a
|
6 |
|
7 |
"""
|
8 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
9 |
"""
|
10 |
demo = gr.Interface(
|
11 |
search,
|
12 |
-
inputs=
|
13 |
-
|
|
|
|
|
|
|
14 |
)
|
15 |
|
16 |
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
+
from urllib import request
|
4 |
+
from lxml import etree
|
5 |
+
|
6 |
+
url_prefix_mapping = {
|
7 |
+
'acl': 'https://aclanthology.org',
|
8 |
+
'emnlp': 'https://aclanthology.org',
|
9 |
+
'naacl': 'https://aclanthology.org',
|
10 |
+
'tacl': 'https://aclanthology.org',
|
11 |
+
'nips': 'https://papers.nips.cc',
|
12 |
+
'icml': 'https://papers.nips.cc',
|
13 |
+
'iclr': 'https://iclr.cc',
|
14 |
+
}
|
15 |
+
|
16 |
+
mlr_mapping = {
|
17 |
+
('icml', 2020): 'v119',
|
18 |
+
('icml', 2021): 'v139',
|
19 |
+
('icml', 2022): 'v162',
|
20 |
+
('icml', 2023): 'v202',
|
21 |
+
('icml', 2024): 'v139',
|
22 |
+
}
|
23 |
+
|
24 |
+
def get_paper_home(venue, year):
|
25 |
+
if venue in ['acl', 'emnlp', 'naacl']:
|
26 |
+
return f'https://aclanthology.org/events/{venue}-{year}'
|
27 |
+
|
28 |
+
elif venue == 'nips':
|
29 |
+
return f'https://papers.{venue}.cc/paper_files/paper/{year}'
|
30 |
+
|
31 |
+
elif venue == 'icml':
|
32 |
+
return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}'
|
33 |
+
|
34 |
+
elif venue == 'iclr':
|
35 |
+
return f'https://iclr.cc/Downloads/{year}'
|
36 |
+
|
37 |
+
|
38 |
+
def check_key_words(ele):
|
39 |
+
s = ''.join(ele.itertext()).lower()
|
40 |
+
url = ele.get('href')
|
41 |
+
for i in keywords:
|
42 |
+
match = re.search(i, s)
|
43 |
+
if match:
|
44 |
+
return True
|
45 |
+
|
46 |
+
return False
|
47 |
+
|
48 |
+
def check_key_words_icml(ele):
|
49 |
+
s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower()
|
50 |
+
url = ele.get('href')
|
51 |
+
for i in keywords:
|
52 |
+
match = re.search(i, s)
|
53 |
+
if match:
|
54 |
+
return True
|
55 |
+
|
56 |
+
return False
|
57 |
+
|
58 |
+
|
59 |
+
def search(keywords, venues):
|
60 |
+
search_venues = []
|
61 |
+
if "NeurIPS/ICLR/ICML" in venues:
|
62 |
+
search_venues.extend(['nips', 'iclr', 'icml'])
|
63 |
+
if "*ACL" in venues:
|
64 |
+
search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl'])
|
65 |
+
if "CVPR/ECCV/ICCV" in venues:
|
66 |
+
search_venues.extend(['nips', 'iclr', 'icml'])
|
67 |
+
|
68 |
+
results = []
|
69 |
+
for venue in search_venues:
|
70 |
+
if 'acl' in venue:
|
71 |
+
paper_tag_on_html = ".//a[@class='align-middle']"
|
72 |
+
elif venue == 'iclr':
|
73 |
+
paper_tag_on_html = ".//a[@class='Poster']"
|
74 |
+
elif venue == 'nips':
|
75 |
+
paper_tag_on_html = ".//a[@title='paper title']"
|
76 |
+
elif venue == 'icml':
|
77 |
+
paper_tag_on_html = ".//div[@class='paper']"
|
78 |
+
|
79 |
+
for year in years:
|
80 |
+
print(venue, year)
|
81 |
+
|
82 |
+
paper_home = get_paper_home(venue, year)
|
83 |
+
url_prefix = url_prefix_mapping[venue]
|
84 |
+
if venue == 'icml':
|
85 |
+
url_prefix = paper_home
|
86 |
+
|
87 |
+
try:
|
88 |
+
response = request.urlopen(paper_home)
|
89 |
+
except:
|
90 |
+
continue
|
91 |
+
|
92 |
+
html = response.read().decode()
|
93 |
+
tree = etree.fromstring(html, etree.HTMLParser())
|
94 |
+
|
95 |
+
elements = tree.findall(paper_tag_on_html)
|
96 |
+
if venue == 'icml':
|
97 |
+
elements = [i for i in elements if check_key_words_icml(i)]
|
98 |
+
urls = [i.find('.//p[@class="links"]').find('a').get('href') for i in elements]
|
99 |
+
results.extend(urls)
|
100 |
+
|
101 |
+
else:
|
102 |
+
elements = [i for i in elements if check_key_words(i)]
|
103 |
+
urls = [url_prefix + i.find('.//p[@class="links"]').get('href') for i in elements]
|
104 |
+
results.extend(urls)
|
105 |
+
|
106 |
+
print(len(elements))
|
107 |
+
print()
|
108 |
+
|
109 |
+
return results
|
110 |
|
|
|
|
|
111 |
|
112 |
"""
|
113 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
114 |
"""
|
115 |
demo = gr.Interface(
|
116 |
search,
|
117 |
+
inputs=[
|
118 |
+
gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."),
|
119 |
+
gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"])
|
120 |
+
],
|
121 |
+
outputs=gr.DataFrame(headers=["Paper Link", ])#"Title", "Authors"
|
122 |
)
|
123 |
|
124 |
|