|
from climateqa.engine.keywords import make_keywords_chain |
|
from climateqa.engine.llm import get_llm |
|
from climateqa.knowledge.openalex import OpenAlex |
|
from climateqa.engine.chains.answer_rag import make_rag_papers_chain |
|
from front.utils import make_html_df |
|
from climateqa.engine.reranker import get_reranker |
|
|
|
oa = OpenAlex() |
|
|
|
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0) |
|
reranker = get_reranker("nano") |
|
|
|
|
|
papers_cols_widths = { |
|
"id":100, |
|
"title":300, |
|
"doi":100, |
|
"publication_year":100, |
|
"abstract":500, |
|
"is_oa":50, |
|
} |
|
|
|
papers_cols = list(papers_cols_widths.keys()) |
|
papers_cols_widths = list(papers_cols_widths.values()) |
|
|
|
|
|
|
|
def generate_keywords(query): |
|
chain = make_keywords_chain(llm) |
|
keywords = chain.invoke(query) |
|
keywords = " AND ".join(keywords["keywords"]) |
|
return keywords |
|
|
|
|
|
async def find_papers(query,after, relevant_content_sources, reranker= reranker): |
|
if "OpenAlex" in relevant_content_sources: |
|
summary = "" |
|
keywords = generate_keywords(query) |
|
df_works = oa.search(keywords,after = after) |
|
|
|
print(f"Found {len(df_works)} papers") |
|
|
|
if not df_works.empty: |
|
df_works = df_works.dropna(subset=["abstract"]) |
|
df_works = df_works[df_works["abstract"] != ""].reset_index(drop = True) |
|
df_works = oa.rerank(query,df_works,reranker) |
|
df_works = df_works.sort_values("rerank_score",ascending=False) |
|
docs_html = [] |
|
for i in range(10): |
|
docs_html.append(make_html_df(df_works, i)) |
|
docs_html = "".join(docs_html) |
|
G = oa.make_network(df_works) |
|
|
|
height = "750px" |
|
network = oa.show_network(G,color_by = "rerank_score",notebook=False,height = height) |
|
network_html = network.generate_html() |
|
|
|
network_html = network_html.replace("'", "\"") |
|
css_to_inject = "<style>#mynetwork { border: none !important; } .card { border: none !important; }</style>" |
|
network_html = network_html + css_to_inject |
|
|
|
|
|
network_html = f"""<iframe style="width: 100%; height: {height};margin:0 auto" name="result" allow="midi; geolocation; microphone; camera; |
|
display-capture; encrypted-media;" sandbox="allow-modals allow-forms |
|
allow-scripts allow-same-origin allow-popups |
|
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" |
|
allowpaymentrequest="" frameborder="0" srcdoc='{network_html}'></iframe>""" |
|
|
|
|
|
docs = df_works["content"].head(10).tolist() |
|
|
|
df_works = df_works.reset_index(drop = True).reset_index().rename(columns = {"index":"doc"}) |
|
df_works["doc"] = df_works["doc"] + 1 |
|
df_works = df_works[papers_cols] |
|
|
|
yield docs_html, network_html, summary |
|
|
|
chain = make_rag_papers_chain(llm) |
|
result = chain.astream_log({"question": query,"docs": docs,"language":"English"}) |
|
path_answer = "/logs/StrOutputParser/streamed_output/-" |
|
|
|
async for op in result: |
|
|
|
op = op.ops[0] |
|
|
|
if op['path'] == path_answer: |
|
new_token = op['value'] |
|
summary += new_token |
|
else: |
|
continue |
|
yield docs_html, network_html, summary |
|
else : |
|
print("No papers found") |
|
else : |
|
yield "","", "" |
|
|