jialicheng commited on
Commit
c8c0cec
1 Parent(s): d0e36a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -4
app.py CHANGED
@@ -1,16 +1,124 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def search(a):
5
- return a
6
 
7
  """
8
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
9
  """
10
  demo = gr.Interface(
11
  search,
12
- inputs=gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."),
13
- outputs="text"
 
 
 
14
  )
15
 
16
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from urllib import request
4
+ from lxml import etree
5
+
6
+ url_prefix_mapping = {
7
+ 'acl': 'https://aclanthology.org',
8
+ 'emnlp': 'https://aclanthology.org',
9
+ 'naacl': 'https://aclanthology.org',
10
+ 'tacl': 'https://aclanthology.org',
11
+ 'nips': 'https://papers.nips.cc',
12
+ 'icml': 'https://papers.nips.cc',
13
+ 'iclr': 'https://iclr.cc',
14
+ }
15
+
16
+ mlr_mapping = {
17
+ ('icml', 2020): 'v119',
18
+ ('icml', 2021): 'v139',
19
+ ('icml', 2022): 'v162',
20
+ ('icml', 2023): 'v202',
21
+ ('icml', 2024): 'v139',
22
+ }
23
+
24
+ def get_paper_home(venue, year):
25
+ if venue in ['acl', 'emnlp', 'naacl']:
26
+ return f'https://aclanthology.org/events/{venue}-{year}'
27
+
28
+ elif venue == 'nips':
29
+ return f'https://papers.{venue}.cc/paper_files/paper/{year}'
30
+
31
+ elif venue == 'icml':
32
+ return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}'
33
+
34
+ elif venue == 'iclr':
35
+ return f'https://iclr.cc/Downloads/{year}'
36
+
37
+
38
+ def check_key_words(ele):
39
+ s = ''.join(ele.itertext()).lower()
40
+ url = ele.get('href')
41
+ for i in keywords:
42
+ match = re.search(i, s)
43
+ if match:
44
+ return True
45
+
46
+ return False
47
+
48
+ def check_key_words_icml(ele):
49
+ s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower()
50
+ url = ele.get('href')
51
+ for i in keywords:
52
+ match = re.search(i, s)
53
+ if match:
54
+ return True
55
+
56
+ return False
57
+
58
+
59
+ def search(keywords, venues):
60
+ search_venues = []
61
+ if "NeurIPS/ICLR/ICML" in venues:
62
+ search_venues.extend(['nips', 'iclr', 'icml'])
63
+ if "*ACL" in venues:
64
+ search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl'])
65
+ if "CVPR/ECCV/ICCV" in venues:
66
+ search_venues.extend(['nips', 'iclr', 'icml'])
67
+
68
+ results = []
69
+ for venue in search_venues:
70
+ if 'acl' in venue:
71
+ paper_tag_on_html = ".//a[@class='align-middle']"
72
+ elif venue == 'iclr':
73
+ paper_tag_on_html = ".//a[@class='Poster']"
74
+ elif venue == 'nips':
75
+ paper_tag_on_html = ".//a[@title='paper title']"
76
+ elif venue == 'icml':
77
+ paper_tag_on_html = ".//div[@class='paper']"
78
+
79
+ for year in years:
80
+ print(venue, year)
81
+
82
+ paper_home = get_paper_home(venue, year)
83
+ url_prefix = url_prefix_mapping[venue]
84
+ if venue == 'icml':
85
+ url_prefix = paper_home
86
+
87
+ try:
88
+ response = request.urlopen(paper_home)
89
+ except:
90
+ continue
91
+
92
+ html = response.read().decode()
93
+ tree = etree.fromstring(html, etree.HTMLParser())
94
+
95
+ elements = tree.findall(paper_tag_on_html)
96
+ if venue == 'icml':
97
+ elements = [i for i in elements if check_key_words_icml(i)]
98
+ urls = [i.find('.//p[@class="links"]').find('a').get('href') for i in elements]
99
+ results.extend(urls)
100
+
101
+ else:
102
+ elements = [i for i in elements if check_key_words(i)]
103
+ urls = [url_prefix + i.find('.//p[@class="links"]').get('href') for i in elements]
104
+ results.extend(urls)
105
+
106
+ print(len(elements))
107
+ print()
108
+
109
+ return results
110
 
 
 
111
 
112
  """
113
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
114
  """
115
  demo = gr.Interface(
116
  search,
117
+ inputs=[
118
+ gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."),
119
+ gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"])
120
+ ],
121
+ outputs=gr.DataFrame(headers=["Paper Link", ])#"Title", "Authors"
122
  )
123
 
124