import gradio as gr from huggingface_hub import InferenceClient import re import datetime from urllib import request from lxml import etree url_prefix_mapping = { 'acl': 'https://aclanthology.org', 'emnlp': 'https://aclanthology.org', 'naacl': 'https://aclanthology.org', 'tacl': 'https://aclanthology.org', 'nips': 'https://papers.nips.cc', 'icml': 'https://papers.nips.cc', 'iclr': 'https://iclr.cc', } mlr_mapping = { ('icml', 2020): 'v119', ('icml', 2021): 'v139', ('icml', 2022): 'v162', ('icml', 2023): 'v202', ('icml', 2024): 'v139', } def get_paper_home(venue, year): if venue in ['acl', 'emnlp', 'naacl']: return f'https://aclanthology.org/events/{venue}-{year}' elif venue == 'nips': return f'https://papers.{venue}.cc/paper_files/paper/{year}' elif venue == 'icml': return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}' elif venue == 'iclr': return f'https://iclr.cc/Downloads/{year}' def check_keywords(ele, keywords): s = ''.join(ele.itertext()).lower() url = ele.get('href') for i in keywords: match = re.search(i, s) if match: return True return False def check_keywords_icml(ele, keywords): s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower() url = ele.get('href') for i in keywords: match = re.search(i, s) if match: return True return False def search(keywords, venues, min_year, max_year): keywords = [keyword.strip() for keyword in keywords.split(",")] year_range = list(range(min_year, max_year)) search_venues = [] if "NeurIPS/ICLR/ICML" in venues: search_venues.extend(['nips', 'iclr', 'icml']) if "*ACL" in venues: search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl']) if "CVPR/ECCV/ICCV" in venues: search_venues.extend(['nips', 'iclr', 'icml']) results = [] for venue in search_venues: for year in year_range: print(venue, year) paper_home = get_paper_home(venue, year) url_prefix = url_prefix_mapping[venue] if venue == 'icml': url_prefix = paper_home try: response = request.urlopen(paper_home) except: continue html = response.read().decode() tree = etree.fromstring(html, etree.HTMLParser()) if 'acl' in venue: paper_tag_on_html = ".//a[@class='align-middle']" elif venue == 'iclr': paper_tag_on_html = ".//a[@class='Poster']" elif venue == 'nips': paper_tag_on_html = ".//a[@title='paper title']" elif venue == 'icml': paper_tag_on_html = ".//div[@class='paper']" elements = tree.findall(paper_tag_on_html) for element in elements: if venue == 'icml': if check_keywords_icml(element, keywords): paper_url = element.find('.//p[@class="links"]').find('a').get('href') results.append([paper_url]) else: if check_keywords(element, keywords): paper_url = url_prefix + element.get('href') results.append([paper_url]) print(len(elements)) print() return results """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ current_year = datetime.datetime.now().year # demo = gr.Interface( # search, # inputs=[ # gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."), # gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"]), # gr.Slider(minimum=2020, maximum=current_year, value=[2020, current_year], label="Year Range", step=1) # ], # outputs=gr.DataFrame(headers=["Paper Link", "Title", "Authors"]) # ) def test_search(keywords, venues, min_year, max_year): return [["https://example.com"], ["https://anotherexample.com"]] with gr.Blocks() as demo: with gr.Row(): # Organize inputs and outputs in a row (side by side) with gr.Column(scale=1): # Input section (narrower) # Textbox for keywords textbox = gr.Textbox( label="Enter comma-separated keywords", placeholder="Enter keywords, separated by commas...", lines=2 ) # Vertical checkbox group for actions checkbox = gr.CheckboxGroup( ["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], type="value" ) # Year range slider min_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=2020, label="Select Min Year", step=1) max_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=current_year, label="Select Max Year", step=1) submit_button = gr.Button("Search") with gr.Column(scale=3): # Output section (wider) # Output table output_table = gr.DataFrame( headers=["Paper Link",],# "Title", "Authors" label="Results" ) # Link the input components to the output function submit_button.click( search, inputs=[textbox, checkbox, min_year_slider, max_year_slider], outputs=output_table ) if __name__ == "__main__": demo.launch()