SearchGPT

Paused

App Files Files Community

Shreyas094 commited on Jul 9, 2024

Commit

8ac8380

verified ·

1 Parent(s): 4234e59

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -3

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ import urllib.parse
 from tempfile import NamedTemporaryFile
 from typing import List
 from bs4 import BeautifulSoup
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_community.vectorstores import FAISS
 from langchain_community.document_loaders import PyPDFLoader
@@ -22,6 +24,7 @@ from langchain_core.documents import Document
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 # Memory database to store question-answer pairs
@@ -302,13 +305,16 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
                 clean_content = article["title"]
             full_summary, cleaned_summary = summarize_news_content(clean_content, model)
             processed_article = {
                 "published_date": article["published_date"],
                 "title": article["title"],
                 "url": article["url"],
                 "content": clean_content,
                 "summary": full_summary,
-                "cleaned_summary": cleaned_summary
             }
             processed_articles.append(processed_article)
         except Exception as e:
@@ -321,7 +327,8 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
     docs = [Document(page_content=article["cleaned_summary"], metadata={
         "source": article["url"],
         "title": article["title"],
-        "published_date": article["published_date"]
     }) for article in processed_articles]
     try:
@@ -341,7 +348,6 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
     except Exception as e:
         return f"Error adding articles to the database: {str(e)}"
 def fetch_articles_from_page(url):
     response = requests.get(url)
     response.raise_for_status()
@@ -449,12 +455,45 @@ def export_news_to_excel():
     df['summary'] = df['cleaned_summary']
     df = df.drop(columns=['cleaned_summary'])  # Remove the extra column
     with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
         excel_path = tmp.name
         df.to_excel(excel_path, index=False)
     return excel_path
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss):
     global conversation_history

 from tempfile import NamedTemporaryFile
 from typing import List
 from bs4 import BeautifulSoup
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_community.vectorstores import FAISS
 from langchain_community.document_loaders import PyPDFLoader
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 # Memory database to store question-answer pairs
                 clean_content = article["title"]
             full_summary, cleaned_summary = summarize_news_content(clean_content, model)
+            relevance_score = calculate_relevance_score(cleaned_summary, model)
             processed_article = {
                 "published_date": article["published_date"],
                 "title": article["title"],
                 "url": article["url"],
                 "content": clean_content,
                 "summary": full_summary,
+                "cleaned_summary": cleaned_summary,
+                "relevance_score": relevance_score
             }
             processed_articles.append(processed_article)
         except Exception as e:
     docs = [Document(page_content=article["cleaned_summary"], metadata={
         "source": article["url"],
         "title": article["title"],
+        "published_date": article["published_date"],
+        "relevance_score": article["relevance_score"]
     }) for article in processed_articles]
     try:
     except Exception as e:
         return f"Error adding articles to the database: {str(e)}"
 def fetch_articles_from_page(url):
     response = requests.get(url)
     response.raise_for_status()
     df['summary'] = df['cleaned_summary']
     df = df.drop(columns=['cleaned_summary'])  # Remove the extra column
+    # Reorder columns to put relevance_score after summary
+    columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
+    df = df[columns]
     with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
         excel_path = tmp.name
         df.to_excel(excel_path, index=False)
     return excel_path
+def calculate_relevance_score(summary, model):
+    prompt_template = PromptTemplate(
+        input_variables=["summary"],
+        template="""You are a financial analyst tasked with providing a relevance score to news summaries.
+        The score should be based on the financial significance and impact of the news.
+        Use the following scoring guide:
+        - 0.00-0.20: Not relevant to finance or economics
+        - 0.21-0.40: Slightly relevant, but minimal financial impact
+        - 0.41-0.60: Moderately relevant, some financial implications
+        - 0.61-0.80: Highly relevant, significant financial impact
+        - 0.81-1.00: Extremely relevant, major financial implications
+        Provide a score between 0.00 and 1.00, where 0.00 is not relevant at all, and 1.00 is extremely relevant from a financial perspective.
+        Summary: {summary}
+        Relevance Score:"""
+    )
+    chain = LLMChain(llm=model, prompt=prompt_template)
+    response = chain.run(summary=summary)
+    try:
+        score = float(response.strip())
+        return min(max(score, 0.00), 1.00)  # Ensure the score is between 0.00 and 1.00
+    except ValueError:
+        print(f"Error parsing relevance score: {response}")
+        return 0.00
 def ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss):
     global conversation_history