Spaces:

biodatlab
/

NBDT-Recommendation-Engine

Runtime error

File size: 4,763 Bytes

1a5ec75

import gradio as gr
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

import torch


def create_miread_embed(sents, bundle):
    tokenizer = bundle[0]
    model = bundle[1]
    model.cpu()
    tokens = tokenizer(sents,
                       max_length=512,
                       padding=True,
                       truncation=True,
                       return_tensors="pt"
                       )
    device = torch.device('cpu')
    tokens = tokens.to(device)
    with torch.no_grad():
        out = model.bert(**tokens)
        feature = out.last_hidden_state[:, 0, :]
    return feature.cpu()


def get_matches(query, db, k):
    matches = db.similarity_search_with_score(query, k=k)
    return matches


def inference(query, db, k=30):
    matches = get_matches(query, db, k)
    j_bucket = {}
    n_table = []
    a_table = []
    for i, match in enumerate(matches):
        doc = match[0]
        score = match[1]
        title = doc.metadata['title']
        author = eval(doc.metadata['authors'])[0]
        date = doc.metadata['date']
        link = doc.metadata['link']
        submitter = doc.metadata['submitter']
        journal = doc.metadata['journal']

        # For journals
        if journal not in j_bucket:
            j_bucket[journal] = score
        else:
            j_bucket[journal] += score

        # For authors
        record = [i+1,
                  round(score, 3),
                  author,
                  title,
                  link,
                  date]
        n_table.append(record)

        # For abstracts
        record = [i+1,
                  title,
                  author,
                  submitter,
                  journal,
                  date,
                  link,
                  round(score, 3)
                  ]
        a_table.append(record)
    j_table = sorted([[journal, round(score, 3)] for journal,
                     score in j_bucket.items()], key=lambda x: x[1], reverse=True)
    j_table = [[i+1, item[0], round(item[1], 3)]
               for i, item in enumerate(j_table)]
    j_output = gr.Dataframe.update(value=j_table, visible=True)
    n_output = gr.Dataframe.update(value=n_table, visible=True)
    a_output = gr.Dataframe.update(value=a_table, visible=True)
    return [a_output, j_output, n_output]


model_name = "biodatlab/MIReAD-Neuro"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
faiss_embedder = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vecdb = FAISS.load_local("faiss_index", faiss_embedder)


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# NBDT Recommendation Engine for Editors")
    gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
    It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
    To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
    Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
    The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")

    abst = gr.Textbox(label="Abstract", lines=10)

    k = gr.Slider(1, 100, step=1, value=50,
                  label="Number of matches to consider")

    action_btn = gr.Button(value="Find Matches")

    with gr.Tab("Authors"):
        n_output = gr.Dataframe(
            headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
            datatype=['number', 'str', 'str', 'number', 'str', 'str'],
            col_count=(6, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Abstracts"):
        a_output = gr.Dataframe(
            headers=['No.', 'Title', 'Author', 'Submitter',
                     'Journal', 'Date', 'Link', 'Score'],
            datatype=['number', 'str', 'str', 'str', 'number'],
            col_count=(8, "fixed"),
            wrap=True,
            visible=False
        )
    with gr.Tab("Journals"):
        j_output = gr.Dataframe(
            headers=['No.', 'Name', 'Score'],
            datatype=['number', 'str', 'number'],
            col_count=(3, "fixed"),
            visible=False
        )
    action_btn.click(fn=inference,
                     inputs=[
                         abst,
                         k,
                         # modes,
                     ],
                     outputs=[a_output, j_output, n_output],
                     api_name="neurojane")

demo.launch(debug=True)