import gradio as gr from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings import torch def create_miread_embed(sents, bundle): tokenizer = bundle[0] model = bundle[1] model.cpu() tokens = tokenizer(sents, max_length=512, padding=True, truncation=True, return_tensors="pt" ) device = torch.device('cpu') tokens = tokens.to(device) with torch.no_grad(): out = model.bert(**tokens) feature = out.last_hidden_state[:, 0, :] return feature.cpu() def get_matches(query, db, k): matches = db.similarity_search_with_score(query, k=k) return matches def inference(query, db, k=30): matches = get_matches(query, db, k) j_bucket = {} n_table = [] a_table = [] for i, match in enumerate(matches): doc = match[0] score = match[1] title = doc.metadata['title'] author = eval(doc.metadata['authors'])[0] date = doc.metadata['date'] link = doc.metadata['link'] submitter = doc.metadata['submitter'] journal = doc.metadata['journal'] # For journals if journal not in j_bucket: j_bucket[journal] = score else: j_bucket[journal] += score # For authors record = [i+1, round(score, 3), author, title, link, date] n_table.append(record) # For abstracts record = [i+1, title, author, submitter, journal, date, link, round(score, 3) ] a_table.append(record) j_table = sorted([[journal, round(score, 3)] for journal, score in j_bucket.items()], key=lambda x: x[1], reverse=True) j_table = [[i+1, item[0], round(item[1], 3)] for i, item in enumerate(j_table)] j_output = gr.Dataframe.update(value=j_table, visible=True) n_output = gr.Dataframe.update(value=n_table, visible=True) a_output = gr.Dataframe.update(value=a_table, visible=True) return [a_output, j_output, n_output] model_name = "biodatlab/MIReAD-Neuro" model_kwargs = {'device': 'cuda'} encode_kwargs = {'normalize_embeddings': False} faiss_embedder = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) vecdb = FAISS.load_local("faiss_index", faiss_embedder) with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# NBDT Recommendation Engine for Editors") gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\ The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.") abst = gr.Textbox(label="Abstract", lines=10) k = gr.Slider(1, 100, step=1, value=50, label="Number of matches to consider") action_btn = gr.Button(value="Find Matches") with gr.Tab("Authors"): n_output = gr.Dataframe( headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'], datatype=['number', 'str', 'str', 'number', 'str', 'str'], col_count=(6, "fixed"), wrap=True, visible=False ) with gr.Tab("Abstracts"): a_output = gr.Dataframe( headers=['No.', 'Title', 'Author', 'Submitter', 'Journal', 'Date', 'Link', 'Score'], datatype=['number', 'str', 'str', 'str', 'number'], col_count=(8, "fixed"), wrap=True, visible=False ) with gr.Tab("Journals"): j_output = gr.Dataframe( headers=['No.', 'Name', 'Score'], datatype=['number', 'str', 'number'], col_count=(3, "fixed"), visible=False ) action_btn.click(fn=inference, inputs=[ abst, k, # modes, ], outputs=[a_output, j_output, n_output], api_name="neurojane") demo.launch(debug=True)